--- /dev/null
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
--- /dev/null
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
--- /dev/null
+Copyright 1999 by Gerd Stolpmann
+
+The package "netstring" is copyright by Gerd Stolpmann.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the "netstring" software (the "Software"), to deal in the
+Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
--- /dev/null
+version = "0.9.3"
+requires = "str"
+description = "String processing for the Internet"
+
+archive(byte) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo"
+archive(byte,toploop) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo
+ netstring_top.cmo"
+archive(byte,mt) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo
+ netstring_mt.cmo"
+archive(byte,mt,toploop) =
+ "netstring.cma netmappings_iso.cmo netmappings_other.cmo
+ netstring_mt.cmo netstring_top.cmo"
+archive(native) =
+ "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx"
+archive(native,mt) =
+ "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx
+ netstring_mt.cmx"
+
+archive(byte,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo"
+archive(byte,toploop,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo
+ netstring_top.cmo"
+archive(byte,mt,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo
+ netstring_mt.cmo"
+archive(byte,mt,toploop,netstring_only_iso) =
+ "netstring.cma netmappings_iso.cmo
+ netstring_mt.cmo netstring_top.cmo"
+archive(native,netstring_only_iso) =
+ "netstring.cmxa netmappings_iso.cmx"
+archive(native,mt,netstring_only_iso) =
+ "netstring.cmxa netmappings_iso.cmx
+ netstring_mt.cmx"
+
+archive(byte,netstring_minimum) =
+ "netstring.cma"
+archive(byte,toploop,netstring_minimum) =
+ "netstring.cma
+ netstring_top.cmo"
+archive(byte,mt,netstring_minimum) =
+ "netstring.cma
+ netstring_mt.cmo"
+archive(byte,mt,toploop,netstring_minimum) =
+ "netstring.cma
+ netstring_mt.cmo netstring_top.cmo"
+archive(native,netstring_minimum) =
+ "netstring.cmxa"
+archive(native,mt,netstring_minimum) =
+ "netstring.cmxa
+ netstring_mt.cmx"
--- /dev/null
+# make all: make bytecode archive
+# make opt: make native archive
+# make install: install bytecode archive, and if present, native archive
+# make uninstall: uninstall package
+# make clean: remove intermediate files
+# make distclean: remove any superflous files
+# make release: cleanup, create archive, tag CVS module
+# (for developers)
+
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS = netstring_str.cmo \
+ netencoding.cmo netbuffer.cmo netstream.cmo \
+ mimestring.cmo cgi.cmo base64.cmo \
+ nethtml_scanner.cmo nethtml.cmo \
+ neturl.cmo \
+ netmappings.cmo netconversion.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE = netstring.cma
+XARCHIVE = netstring.cmxa
+
+NAME = netstring
+REQUIRES = str
+
+ISO_MAPPINGS = mappings/iso*.unimap
+OTHER_MAPPINGS = mappings/cp*.unimap \
+ mappings/adobe*.unimap \
+ mappings/jis*.unimap \
+ mappings/koi*.unimap \
+ mappings/mac*.unimap \
+ mappings/windows*.unimap
+
+all: $(ARCHIVE) \
+ netstring_top.cmo netstring_mt.cmo \
+ netmappings_iso.cmo netmappings_other.cmo
+
+opt: $(XARCHIVE) \
+ netstring_mt.cmx \
+ netmappings_iso.cmx netmappings_other.cmx
+
+
+$(ARCHIVE): $(OBJECTS)
+ $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+ $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+netmappings_iso.ml:
+ $(MAKE) -C tools
+ test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
+ -o netmappings_iso.ml $(ISO_MAPPINGS)
+
+netmappings_other.ml:
+ $(MAKE) -C tools
+ test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \
+ -o netmappings_other.ml $(OTHER_MAPPINGS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc $(DEBUG) $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt $(OPTIONS) $(ROPTIONS)
+OCAMLLEX = ocamllex
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+DEBUG =
+# Invoke with: make DEBUG=-g
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+ $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.PHONY: install
+install: all
+ { test ! -f $(XARCHIVE) || extra="*.cmxa *.a netstring_mt.cmx netmappings_iso.cmx netmappings_other.cmx netstring_mt.o netmappings_iso.o netmappings_other.o"; }; \
+ $(OCAMLFIND) install $(NAME) *.mli *.cmi *.cma netstring_top.cmo netstring_mt.cmo netmappings_iso.cmo netmappings_other.cmo META $$extra
+
+.PHONY: install-cgi
+install-cgi:
+ $(OCAMLFIND) install cgi compat-cgi/META
+
+
+.PHONY: install-base64
+install-base64:
+ $(OCAMLFIND) install base64 compat-base64/META
+
+
+.PHONY: uninstall
+uninstall:
+ $(OCAMLFIND) remove $(NAME)
+
+.PHONY: uninstall-cgi
+uninstall-cgi:
+ $(OCAMLFIND) remove cgi
+
+.PHONY: uninstall-base64
+uninstall-base64:
+ $(OCAMLFIND) remove base64
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+ test ! -d mappings || rm -f netmappings_iso.ml netmappings_other.ml
+
+.PHONY: distclean
+distclean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+ rm -f *~ depend depend.pkg compat-cgi/*~ compat-base64/*~
+ $(MAKE) -C tests distclean
+ $(MAKE) -C doc distclean
+ $(MAKE) -C tools distclean
+
+RELEASE: META
+ awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
+
+.PHONY: dist
+dist: RELEASE
+ r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*/doc/readme.dtd" --exclude="*/Mail" --exclude="*/mappings" $(NAME)
+
+.PHONY: tag-release
+tag-release: RELEASE
+ r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r $(NAME)
+
+.PHONY: release
+release: distclean
+ test -f netmappings_iso.ml
+ test -f netmappings_other.ml
+ $(MAKE) tag-release
+ $(MAKE) dist
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll
+
+.ml.cmx:
+ $(OCAMLOPT) -c -thread $<
+
+.ml.cmo:
+ $(OCAMLC) -c -thread $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ $(OCAMLLEX) $<
+
+include depend
+include depend.pkg
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let encode s = Netencoding.Base64.encode s;;
+let url_encode s = Netencoding.Base64.url_encode s;;
+let decode s = Netencoding.Base64.decode s;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.1 2000/03/02 01:15:20 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Base64 compatibility module *)
+(**********************************************************************)
+
+(* PLEASE DO NOT USE THIS MODULE IN NEW SOFTWARE!
+ * The module Netencoding.Base64 is the preferred API. This module is
+ * only for compatibility with older software.
+ *)
+
+(* This interface is compatible with all previously released Base64
+ * modules (0.1 and 0.2).
+ *)
+
+val encode : string -> string
+
+val url_encode : string -> string
+
+val decode : string -> string
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/02 01:15:20 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+exception Resources_exceeded
+
+type argument_processing = Memory | File | Automatic;;
+
+type argument =
+ { mutable arg_name : string;
+ mutable arg_processing : argument_processing;
+ mutable arg_buf_value : Buffer.t;
+ mutable arg_mem_value : string option;
+ (* Here, the value is stored if it must be kept in memory *)
+ mutable arg_disk_value : string Weak.t;
+ (* This component is used iff arg_mem_value = None. The
+ * weak array has a length of 1, and the single element stores
+ * the value (if any).
+ *)
+ mutable arg_file : string option;
+ (* The filename of the temporary file storing the value *)
+ mutable arg_fd : out_channel option;
+ (* The file descriptor of the temp file (if open) *)
+ mutable arg_mimetype : string;
+ mutable arg_filename : string option;
+ mutable arg_header : (string * string) list;
+ (* For the last three components, see the description of the
+ * corresponding functions in the mli file.
+ *)
+ }
+;;
+
+type workaround =
+ Work_around_MSIE_Content_type_bug
+ | Work_around_backslash_bug
+;;
+
+type config =
+ { maximum_content_length : int;
+ how_to_process_arguments : argument -> argument_processing;
+ tmp_directory : string;
+ tmp_prefix : string;
+ workarounds : workaround list;
+ }
+;;
+
+
+let print_argument arg =
+ Format.printf
+ "<CGIARG name=%s filename=%s mimetype=%s store=%s>"
+ arg.arg_name
+ (match arg.arg_filename with None -> "*" | Some n -> n)
+ arg.arg_mimetype
+ (match arg.arg_file with None -> "Memory" | Some n -> n)
+;;
+
+
+let encode = Netencoding.Url.encode ;;
+let decode = Netencoding.Url.decode ;;
+
+
+
+let url_split_re =
+ Str.regexp "[&=]";;
+
+
+let mk_url_encoded_parameters nv_pairs =
+ String.concat "&"
+ (List.map
+ (fun (name,value) ->
+ let name_encoded = Netencoding.Url.encode name in
+ let value_encoded = Netencoding.Url.encode value in
+ name_encoded ^ "=" ^ value_encoded
+ )
+ nv_pairs
+ )
+;;
+
+
+let dest_url_encoded_parameters parstr =
+
+ let rec parse_after_amp tl =
+ match tl with
+ Str.Text name :: Str.Delim "=" :: Str.Text value :: tl' ->
+ (Netencoding.Url.decode name,
+ Netencoding.Url.decode value) :: parse_next tl'
+ | Str.Text name :: Str.Delim "=" :: Str.Delim "&" :: tl' ->
+ (Netencoding.Url.decode name, "") :: parse_after_amp tl'
+ | Str.Text name :: Str.Delim "=" :: [] ->
+ [Netencoding.Url.decode name, ""]
+ | _ ->
+ failwith "Cgi.dest_url_encoded_parameters"
+ and parse_next tl =
+ match tl with
+ [] -> []
+ | Str.Delim "&" :: tl' ->
+ parse_after_amp tl'
+ | _ ->
+ failwith "Cgi.dest_url_encoded_parameters"
+ in
+ let toklist = Str.full_split url_split_re parstr in
+ match toklist with
+ [] -> []
+ | _ -> parse_after_amp toklist
+;;
+
+
+let mk_form_encoded_parameters ntv_triples =
+ failwith "Cgi.mk_form_encoded_parameters: not implemented";;
+
+
+let dest_parameter_header header options =
+ let get_name s =
+ (* s is: form-data; ... name="fieldname" ...
+ * Extract "fieldname"
+ *)
+ try
+ let tok, params = Mimestring.scan_value_with_parameters s options in
+ List.assoc "name" params
+ with
+ Not_found ->
+ failwith "Cgi.dest_form_encoded_parameters"
+ | Failure "Mimestring.scan_value_with_parameters" ->
+ failwith "Cgi.dest_form_encoded_parameters"
+ in
+
+ let get_filename s =
+ (* s is: form-data; ... filename="fieldname" ...
+ * Extract "fieldname"
+ *)
+ try
+ let tok, params = Mimestring.scan_value_with_parameters s options in
+ Some(List.assoc "filename" params)
+ with
+ Not_found ->
+ None
+ | Failure "Mimestring.scan_value_with_parameters" ->
+ failwith "Cgi.dest_form_encoded_parameters"
+ in
+
+ let mime_type =
+ try List.assoc "content-type" header
+ with Not_found -> "text/plain" in (* the default *)
+
+ let content_disposition =
+ try List.assoc "content-disposition" header
+ with
+ Not_found ->
+ failwith "Cgi.dest_form_encoded_parameters: no content-disposition"
+ in
+
+ let name = get_name content_disposition in
+ let filename = get_filename content_disposition in
+
+ name, mime_type, filename
+;;
+
+
+let dest_form_encoded_parameters parstr ~boundary config =
+ let options =
+ if List.mem Work_around_backslash_bug config.workarounds then
+ [ Mimestring.No_backslash_escaping ]
+ else
+ []
+ in
+ let parts =
+ Mimestring.scan_multipart_body_and_decode
+ parstr 0 (String.length parstr) boundary in
+ List.map
+ (fun (params, value) ->
+
+ let name, mime_type, filename = dest_parameter_header params options in
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = Some value;
+ arg_disk_value = Weak.create 1;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = mime_type;
+ arg_filename = filename;
+ arg_header = params;
+ }
+
+ )
+ parts
+;;
+
+
+let make_temporary_file config =
+ (* Returns (filename, out_channel). *)
+ let rec try_creation n =
+ try
+ let fn =
+ Filename.concat
+ config.tmp_directory
+ (config.tmp_prefix ^ "-" ^ (string_of_int n))
+ in
+ let fd =
+ open_out_gen
+ [ Open_wronly; Open_creat; Open_excl; Open_binary ]
+ 0o666
+ fn
+ in
+ fn, fd
+ with
+ Sys_error m ->
+ (* This does not look very intelligent, but it is the only chance
+ * to limit the number of trials.
+ *)
+ if n > 1000 then
+ failwith ("Cgi: Cannot create temporary file: " ^ m);
+ try_creation (n+1)
+ in
+ try_creation 0
+;;
+
+
+let dest_form_encoded_parameters_from_netstream s ~boundary config =
+ let parts = ref [] in
+ let options =
+ if List.mem Work_around_backslash_bug config.workarounds then
+ [ Mimestring.No_backslash_escaping ]
+ else
+ []
+ in
+
+ let create header =
+ (* CALLBACK for scan_multipart_body_from_netstream *)
+ let name, mime_type, filename = dest_parameter_header header options in
+ let p0 =
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 80;
+ arg_mem_value = None;
+ arg_disk_value = Weak.create 1;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = mime_type;
+ arg_filename = filename;
+ arg_header = header;
+ }
+ in
+ let pr = config.how_to_process_arguments p0 in
+ let p = { p0 with arg_processing = pr } in
+ if pr = File then begin
+ let fn, fd = make_temporary_file config in
+ p.arg_file <- Some fn;
+ p.arg_fd <- Some fd;
+ p.arg_mem_value <- None;
+ end;
+ p
+ in
+
+ let add p s k n =
+ (* CALLBACK for scan_multipart_body_from_netstream *)
+ if (p.arg_processing = Automatic) &&
+ (Buffer.length (p.arg_buf_value) >= Netstream.block_size s) then begin
+ (* This is a LARGE argument *)
+ p.arg_processing <- File;
+ let fn, fd = make_temporary_file config in
+ p.arg_file <- Some fn;
+ p.arg_fd <- Some fd;
+ p.arg_mem_value <- None;
+ output_string fd (Buffer.contents p.arg_buf_value);
+ p.arg_buf_value <- Buffer.create 1;
+ end;
+
+ match p.arg_processing with
+ (Memory|Automatic) ->
+ Buffer.add_substring
+ p.arg_buf_value
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ n
+ | File ->
+ let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
+ output
+ fd
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ n;
+ in
+
+ let stop p =
+ (* CALLBACK for scan_multipart_body_from_netstream *)
+ begin match p.arg_processing with
+ (Memory|Automatic) ->
+ p.arg_mem_value <- Some (Buffer.contents p.arg_buf_value);
+ p.arg_buf_value <- Buffer.create 1;
+ | File ->
+ let fd = match p.arg_fd with Some fd -> fd | None -> assert false in
+ close_out fd;
+ p.arg_mem_value <- None
+ end;
+ parts := p :: !parts
+ in
+
+ Mimestring.scan_multipart_body_from_netstream
+ s
+ boundary
+ create
+ add
+ stop;
+
+ List.rev !parts
+;;
+
+
+let getenv name =
+ try Sys.getenv name with Not_found -> "";;
+
+(* getenv:
+ * We use this getenv instead of Sys.getenv. The CGI specification does not
+ * say anything about what should happen if a certain environment variable
+ * is not set.
+ * Some servers initialize the environment variable to the empty string if
+ * it is not applicable, some servers do not set the variable at all. Because
+ * of this, unset variables are always reported as empty variables.
+ *
+ * This is especially a problem with QUERY_STRING.
+ *)
+
+let mk_simple_arg ~name v =
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = Some v;
+ arg_disk_value = Weak.create 0;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = "text/plain";
+ arg_filename = None;
+ arg_header = [];
+ }
+;;
+
+let mk_memory_arg ~name ?(mime = "text/plain") ?filename ?(header = []) v =
+ { arg_name = name;
+ arg_processing = Memory;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = Some v;
+ arg_disk_value = Weak.create 0;
+ arg_file = None;
+ arg_fd = None;
+ arg_mimetype = mime;
+ arg_filename = filename;
+ arg_header = header;
+ }
+;;
+
+let mk_file_arg
+ ~name ?(mime = "text/plain") ?filename ?(header = []) v_filename =
+ let v_abs_filename =
+ if Filename.is_relative v_filename then
+ Filename.concat (Sys.getcwd()) v_filename
+ else
+ v_filename
+ in
+ { arg_name = name;
+ arg_processing = File;
+ arg_buf_value = Buffer.create 1;
+ arg_mem_value = None;
+ arg_disk_value = Weak.create 0;
+ arg_file = Some v_abs_filename;
+ arg_fd = None;
+ arg_mimetype = mime;
+ arg_filename = filename;
+ arg_header = header;
+ }
+;;
+
+
+let get_content_type config =
+ (* Get the environment variable CONTENT_TYPE; if necessary apply
+ * workarounds for browser bugs.
+ *)
+ let content_type = getenv "CONTENT_TYPE" in
+ let user_agent = getenv "HTTP_USER_AGENT" in
+ let eff_content_type =
+ if Str.string_match (Str.regexp ".*MSIE") user_agent 0 &&
+ List.mem Work_around_MSIE_Content_type_bug config.workarounds
+ then begin
+ (* Microsoft Internet Explorer: When used with SSL connections,
+ * this browser sometimes produces CONTENT_TYPEs like
+ * "multipart/form-data; boundary=..., multipart/form-data; boundary=..."
+ * Workaround: Throw away everything after ", ".
+ *)
+ if Str.string_match (Str.regexp "\\([^,]*boundary[^,]*\\), .*boundary")
+ content_type 0
+ then
+ Str.matched_group 1 content_type
+ else
+ content_type
+ end
+ else
+ content_type
+ in
+ eff_content_type
+;;
+
+
+let really_parse_args config =
+ let make_simple_arg (n,v) = mk_simple_arg n v in
+
+ match getenv "REQUEST_METHOD" with
+ ("GET"|"HEAD") ->
+ List.map
+ make_simple_arg
+ (dest_url_encoded_parameters(getenv "QUERY_STRING"))
+
+ | "POST" ->
+ let n =
+ try
+ int_of_string (getenv "CONTENT_LENGTH")
+ with
+ _ -> failwith "Cgi.parse_arguments"
+ in
+ if n > config.maximum_content_length then
+ raise Resources_exceeded;
+ begin
+ let mime_type, params =
+ Mimestring.scan_mime_type(get_content_type config) [] in
+ match mime_type with
+ "application/x-www-form-urlencoded" ->
+ let buf = String.create n in
+ really_input stdin buf 0 n;
+ List.map
+ make_simple_arg
+ (dest_url_encoded_parameters buf)
+ | "multipart/form-data" ->
+ let boundary =
+ try
+ List.assoc "boundary" params
+ with
+ Not_found ->
+ failwith "Cgi.parse_arguments"
+ in
+ (* -------------------------------------------------- DEBUG
+ let f = open_out "/tmp/cgiout" in
+ output_string f buf;
+ close_out f;
+ * --------------------------------------------------
+ *)
+ dest_form_encoded_parameters_from_netstream
+ (Netstream.create_from_channel stdin (Some n) 4096)
+ boundary
+ config
+ | _ ->
+ failwith ("Cgi.parse_arguments: unknown content-type " ^ mime_type)
+ end
+ | _ ->
+ failwith "Cgi.parse_arguments: unknown method"
+
+let parsed = ref None;; (* protected by lock/unlock *)
+
+let lock = ref (fun () -> ());;
+let unlock = ref (fun () -> ());;
+
+let init_mt new_lock new_unlock =
+ lock := new_lock;
+ unlock := new_unlock
+;;
+
+let protect f =
+ !lock();
+ try
+ let r = f() in
+ !unlock();
+ r
+ with
+ x ->
+ !unlock();
+ raise x
+;;
+
+let parse_arguments config =
+ protect
+ (fun () ->
+ match !parsed with
+ Some _ -> ()
+ | None ->
+ parsed := Some (List.map
+ (fun arg -> arg.arg_name, arg)
+ (really_parse_args config))
+ )
+;;
+
+let arguments () =
+ protect
+ (fun () ->
+ match !parsed with
+ Some plist -> plist
+ | None ->
+ failwith "Cgi.arguments"
+ )
+;;
+
+let set_arguments arglist =
+ protect
+ (fun () ->
+ parsed := Some (List.map
+ (fun arg -> arg.arg_name, arg)
+ arglist)
+ )
+;;
+
+let default_config =
+ { maximum_content_length = max_int;
+ how_to_process_arguments = (fun _ -> Memory);
+ tmp_directory = "/var/tmp";
+ tmp_prefix = "cgi-";
+ workarounds = [ Work_around_MSIE_Content_type_bug;
+ Work_around_backslash_bug;
+ ]
+ }
+;;
+
+let arg_value arg =
+ match arg.arg_mem_value with
+ None ->
+ begin
+ match Weak.get arg.arg_disk_value 0 with
+ None ->
+ begin
+ match arg.arg_file with
+ None ->
+ failwith "Cgi.arg_value: no value present"
+ | Some filename ->
+ let fd = open_in_bin filename in
+ try
+ let len = in_channel_length fd in
+ let s = String.create len in
+ really_input fd s 0 len;
+ Weak.set arg.arg_disk_value 0 (Some s);
+ close_in fd;
+ s
+ with
+ any -> close_in fd; raise any
+ end
+ | Some v -> v
+ end
+ | Some s ->
+ s
+;;
+
+let arg_name arg = arg.arg_name;;
+let arg_file arg = arg.arg_file;;
+let arg_mimetype arg = arg.arg_mimetype;;
+let arg_filename arg = arg.arg_filename;;
+let arg_header arg = arg.arg_header;;
+
+let cleanup () =
+ protect
+ (fun () ->
+ match !parsed with
+ None -> ()
+ | Some plist ->
+ List.iter
+ (fun (name, arg) ->
+ match arg.arg_file with
+ None -> ()
+ | Some filename ->
+ (* We do not complain if the file does not exist anymore. *)
+ if Sys.file_exists filename then
+ Sys.remove filename;
+ arg.arg_file <- None
+ )
+ plist
+ )
+;;
+
+let argument name = List.assoc name (arguments());;
+let argument_value name = arg_value (argument name);;
+
+module Operators = struct
+ let ( !% ) = argument
+ let ( !$ ) = argument_value
+end;;
+
+
+let parse_args() =
+ parse_arguments default_config;
+ List.map
+ (fun (name, arg) -> name, arg_value arg)
+ (arguments())
+;;
+
+let parse_args_with_mimetypes() =
+ parse_arguments default_config;
+ List.map
+ (fun (name, arg) -> name, arg_mimetype arg, arg_value arg)
+ (arguments())
+;;
+
+let header s =
+ let t =
+ match s with
+ "" -> "text/html"
+ | _ -> s
+ in
+ print_string ("Content-type: " ^ t ^ "\n\n");
+ flush stdout
+;;
+
+
+let this_url() =
+ "http://" ^ (getenv "SERVER_NAME") ^ (getenv "SCRIPT_NAME")
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.7 2000/06/25 21:40:36 gerd
+ * Added printer.
+ *
+ * Revision 1.6 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.5 2000/05/16 22:29:36 gerd
+ * Added support for two common file upload bugs.
+ *
+ * Revision 1.4 2000/04/15 16:47:27 gerd
+ * Last minor changes before releasing 0.6.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* FOR SIMPLE CGI PROGRAMS:
+ *
+ * If you do not need all the features of the API below, the following may
+ * be enough:
+ *
+ * - At the beginning of the main program, call 'parse_argument' with
+ * either 'default_config' as argument or with a customized configuration.
+ * - Use 'argument_value(name)' to get the string value of the CGI parameter
+ * 'name'. If you like, you can also open the Cgi.Operators module and
+ * write '!$ name' instead. Here, !$ is a prefix operator equivalent to
+ * argument_value.
+ *
+ * If you do not change the default configuration, you do not need to
+ * worry about temporary files - there are not any.
+ *
+ * Most of the other functions defined below deal with file uploads, and
+ * are only useful for that.
+ *)
+
+
+(**********************************************************************)
+(* CGI functions *)
+(**********************************************************************)
+
+(* First, the general interface to the CGI argument parser. *)
+
+exception Resources_exceeded
+
+type argument
+
+type argument_processing =
+ Memory (* Keep the value of the argument in memory *)
+ | File (* Store the value of the argument into a temporary file *)
+ | Automatic (* Store only large arguments into files. An argument
+ * value is large if it is longer than about one block (4K).
+ * This is not an exact definition.
+ *)
+
+type workaround =
+ Work_around_MSIE_Content_type_bug
+ (* There is a bug in MSIE I observed together with SSL connections.
+ * The CONTENT_TYPE passed to the server has sometimes the wrong
+ * format. This option enables a workaround if the user agent string
+ * contains the word "MSIE".
+ *)
+ | Work_around_backslash_bug
+ (* There is a bug in many browsers: The backslash character is not
+ * handled as an escaping character in MIME headers. Because DOS-
+ * based systems use the backslash regularly in filenames, this bug
+ * matters.
+ * This option changes the interpretation of backslashes such that
+ * these are handled as normal characters. I do not know any browser
+ * that is not affected by this bug, so there is no check on
+ * the user agent string.
+ *)
+
+
+type config =
+ { maximum_content_length : int;
+ (* The maximum CONTENT_LENGTH. Bigger requests trigger an
+ * Resources_exceeded exception. This feature can be used
+ * to detect primitive denial-of-service attacks.
+ *)
+ how_to_process_arguments : argument -> argument_processing;
+ (* After the beginning of an argument has been decoded, the
+ * type of processing is decided by invoking this function on
+ * the argument. Note that the passed argument is incomplete -
+ * it does not have a value. You can assume that name, filename,
+ * MIME type and the whole header are already known.
+ * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS
+ * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION.
+ * All other transport methods can only handle the Memory
+ * processing type.
+ *)
+ tmp_directory : string;
+ (* The temporary directory to use for the temporary files. *)
+ tmp_prefix : string;
+ (* A prefix for temporary files. It is recommended that the prefix
+ * contains a part that is random or that depends on rapidly changing
+ * environment properties. For example, the process ID is a good
+ * candidate, or the current system time. It is not required that
+ * the prefix is unique; there is a fail-safe algorithm that
+ * computes a unique file name from the prefix, even if several
+ * CGI programs run concurrently.
+ *)
+ workarounds : workaround list;
+ (* Specifies which workarounds should be enabled. *)
+ }
+
+val parse_arguments : config -> unit
+val arguments : unit -> (string * argument) list
+ (* - let () = parse_arguments config:
+ * Decodes the CGI arguments. 'config' specifies limits and processing
+ * hints; you can simply pass default_config (see below).
+ *
+ * - let arglist = get_arguments():
+ * The function returns a list with (name, arg) pairs. The name is
+ * passed back as string while the value is returned as opaque type
+ * 'argument'. Below accessor functions are defined. These functions
+ * require that parse_arguments was invoked before.
+ *
+ * Note 1: You can invoke 'parse_arguments' several times, but only
+ * the first time the arguments are read in. If you call the function
+ * again, it does nothing (even if the config changes). This is also
+ * true if 'parse_arguments' has been invoked after 'set_arguments'.
+ *
+ * Note 2: It is not guaranteed that stdin has been read until EOF.
+ * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec).
+ *
+ * Note 3: If arguments are processed in File or Automatic mode, the
+ * caller of 'parse_arguments' is responsible for deleting the files
+ * after use. You may consider to apply the at_exit function of the
+ * core library for this purpose. See also 'cleanup' below.
+ *)
+
+val set_arguments : argument list -> unit
+ (* Alternatively, you can set the arguments to use. This overrides any
+ * previously parsed set of arguments, and also any following parsing.
+ * - Intended for debugging, and to make it possible to replace the
+ * CGI parser by a different one while retaining this API.
+ *)
+
+val default_config : config
+ (* maximum_content_length = maxint
+ * how_to_process_arguments = "use always Memory"
+ * tmp_directory = "/var/tmp"
+ * tmp_prefix = "cgi"
+ * workarounds = [ Work_around_MSIE_content_type_bug;
+ * Work_around_backslash_bug;
+ * ]
+ *
+ * Note 1: On some Unixes, a special file system is used for /tmp that
+ * stores the files into the virtual memory (main memory or swap area).
+ * Because of this, /var/tmp is preferred as default.
+ *
+ * Note 2: Filename.temp_file is not used because it depends on
+ * environment variables which are usually not set in a CGI environment.
+ *)
+
+val arg_name : argument -> string
+val arg_value : argument -> string
+val arg_file : argument -> string option
+val arg_mimetype : argument -> string
+val arg_filename : argument -> string option
+val arg_header : argument -> (string * string) list
+ (* The accessor functions that return several aspects of arguments.
+ * arg_name: returns the name of the argument
+ * arg_value: returns the value of the argument. If the value is stored
+ * in a temporary file, the contents of this file are returned, i.e.
+ * the file is loaded. This may have some consequences:
+ * (1) The function may fail because of I/O errors.
+ * (2) The function may be very slow, especially if the file is
+ * non-local.
+ * (3) If the value is bigger than Sys.max_string_length, the function
+ * raises the exception Resources_exceeded. On 32 bit architectures,
+ * strings are limited to 16 MB.
+ * Note that loaded values are put into weak arrays. This makes it
+ * possible that subsequent calls of 'arg_value' on the same argument
+ * can avoid loading the value again, and that unused values will
+ * nevertheless be collected by the GC.
+ * arg_file: returns 'Some filename' if the value resides in a temporary
+ * file, and 'filename' is the absolute path of this file. If the
+ * value is only available in memory, None is returned.
+ * arg_mimetype: returns the MIME type of the argument. Note that the
+ * default MIME type is "text/plain", and that the default is returned
+ * if the MIME type is not available.
+ * arg_filename: returns 'Some filename' if the argument is associated
+ * with a certain filename (e.g. from a file upload); otherwise None
+ * arg_header: returns pairs (name,value) containing the complete header
+ * of the argument. If the transmission protocol does not specify
+ * a header, the empty list is passed back.
+ *)
+
+val mk_simple_arg : name:string -> string -> argument
+ (* mk_simple_arg name value:
+ * Creates a simple argument with only name, and a value passed by string.
+ * The MIME type is "text/plain".
+ *)
+
+val mk_memory_arg
+ : name:string -> ?mime:string -> ?filename:string ->
+ ?header:((string * string) list) -> string -> argument
+ (* mk_memory_arg name mimetype filename header value:
+ * Creates an argument whose value is kept in memory.
+ *
+ * Note: The signature of this function changed in release 0.8.
+ *)
+
+val mk_file_arg
+ : name:string -> ?mime:string -> ?filename:string ->
+ ?header:((string * string) list) -> string -> argument
+ (* mk_file_arg name mimetype filename header value_filename:
+ * Creates an argument whose value is stored in the file
+ * 'value_filename'. If this file name is not absolute, it is interpreted
+ * relative to the directory returned by Sys.getcwd() - this might not
+ * be what you want with respect to mount points and symlinks (and it
+ * depends on the operating system as getcwd is only POSIX.1). The
+ * file name is turned into an absolute name immediately, and the
+ * function arg_file returns the rewritten name.
+ *
+ * Note: The signature of this function changed in release 0.8.
+ *)
+
+
+val cleanup : unit -> unit
+ (* Removes all temporary files that occur in the current set of arguments
+ * (as returned by 'arguments()').
+ *)
+
+
+(* Convenience functions: *)
+
+val argument : string -> argument
+ (* let argument name = List.assoc name (arguments()) -- i.e. returns
+ * the argument with the passed name. Of course, this function expects
+ * that 'parse_arguments' was called before.
+ *)
+
+val argument_value : string -> string
+ (* let argument_value name = arg_value(argument name) -- i.e. returns
+ * the value of the argument.
+ * See also Operators.( !$ ) below.
+ *)
+
+(* For toploop printers: *)
+
+val print_argument : argument -> unit
+
+
+(* Now, the compatibility functions. *)
+
+val parse_args : unit -> (string * string) list
+ (* Decodes the arguments of the CGI and returns them as an association list
+ * Works whatever the method is (GET or POST)
+ *)
+
+val parse_args_with_mimetypes : unit -> (string * string * string) list
+ (* Like parse_args, but returns also the MIME type.
+ * The triples contain (name, mime_type, value).
+ * If an encoding was chosen that does not transfer the MIME type,
+ * "text/plain" is returned.
+ *
+ * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED.
+ * It was included in netstring-0.4, but most people want not only
+ * the MIME type. parse_arguments should be used instead.
+ *)
+
+val header : string -> unit
+ (* Prints the content-type header.
+ * the argument is the MIME type (default value is "text/html" if the
+ * argument is the empty string)
+ *)
+
+val this_url : unit -> string
+ (* Returns the address of the CGI *)
+
+(**********************************************************************)
+(* The Operators module *)
+(**********************************************************************)
+
+(* If you open the Operators module, you can write
+ * !% "name" instead of argument "name", and
+ * !$ "name" instead of argument_value "name"
+ *)
+
+module Operators : sig
+ val ( !% ) : string -> argument
+ (* same as 'argument' above *)
+ val ( !$ ) : string -> string
+ (* same as 'argument_value' above *)
+end
+
+(**********************************************************************)
+(* Low-level functions *)
+(**********************************************************************)
+
+(* Encoding/Decoding within URLs:
+ *
+ * The following two functions perform the '%'-substitution for
+ * characters that may otherwise be interpreted as metacharacters.
+ *
+ * See also the Netencoding module. This interface contains these functions
+ * to keep the compatibility with the old Cgi module.
+ *)
+
+val decode : string -> string
+val encode : string -> string
+
+(* URL-encoded parameters:
+ *
+ * The following two functions create and analyze URL-encoded parameters.
+ * Format: name1=val1&name2=val2&...
+ *)
+
+val mk_url_encoded_parameters : (string * string) list -> string
+ (* The argument is a list of (name,value) pairs. The result is the
+ * single URL-encoded parameter string.
+ *)
+
+val dest_url_encoded_parameters : string -> (string * string) list
+ (* The argument is the URL-encoded parameter string. The result is
+ * the corresponding list of (name,value) pairs.
+ * Note: Whitespace within the parameter string is ignored.
+ * If there is a format error, the function fails.
+ *)
+
+(* Form-encoded parameters:
+ *
+ * According to: RFCs 2388, 2183, 2045, 2046
+ *
+ * General note: This is a simple API to encode/decode form-encoded parameters.
+ * Especially, it is not possible to pass the header of the parts through
+ * this API.
+ *)
+
+val mk_form_encoded_parameters : (string * string * string) list ->
+ (string * string)
+ (* The argument is a list of (name,mimetype,value) triples.
+ * The result is (parstr, boundary), where 'parstr' is the
+ * single form-encoded parameter string, and 'boundary' is the
+ * boundary to separate the message parts.
+ *
+ * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED!
+ *)
+
+val dest_form_encoded_parameters : string -> boundary:string -> config ->
+ argument list
+ (* The first argument is the form-encoded parameter string.
+ * The second argument is the boundary (extracted from the mime type).
+ * Third argument: Only the workarounds component is used.
+ * The result is
+ * the corresponding list of arguments (all in memory).
+ * If there is a format error, the function fails.
+ * Note: embedded multipart/mixed types are returned as they are,
+ * and are not recursively decoded.
+ * Note: The content-transfer-encodings "7bit", "8bit", "binary",
+ * "base64", and "quoted-printable" are supported.
+ * Note: Parameter names which include spaces or non-alphanumeric
+ * characters may be problematic (the rules of RFC 2047 are NOT applied).
+ * Note: The returned MIME type is not normalized.
+ *)
+
+val dest_form_encoded_parameters_from_netstream
+ : Netstream.t -> boundary:string -> config -> argument list
+ (* let arglist = dest_form_encoded_parameters_from_netstream s b c:
+ * Reads the form-encoded parameters from netstream s. The boundary
+ * is passed in b, and the configuration in c.
+ * A list of arguments is returned.
+ *
+ * See also dest_form_encoded_parameters.
+ *
+ * Restriction: In contrast to dest_form_encoded_parameters, this
+ * function is not able to handle the content-transfer-encodings
+ * "base64" and "quoted-printable". (This is not really a restriction
+ * because no browser uses these encodings in conjunction with HTTP.
+ * This is different if mail transport is chosen. - The reason for
+ * this restriction is that there are currently no stream functions
+ * for decoding.)
+ *)
+
+(* Private functions: *)
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+
+
+(**********************************************************************)
+(* Compatibility with CGI library by J.-C. Filliatre *)
+(**********************************************************************)
+
+(* The following functions are compatible with J.-C. Filliatre's CGI
+ * library:
+ *
+ * parse_args, header, this_url, decode, encode.
+ *
+ * Note that the new implementation of parse_args can be safely invoked
+ * several times.
+ *
+ * Since release 0.8, Netstring's CGI implementation is again thread-safe.
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.7 2000/06/25 21:40:36 gerd
+ * Added printer.
+ *
+ * Revision 1.6 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.5 2000/05/16 22:28:13 gerd
+ * New "workarounds" config component.
+ *
+ * Revision 1.4 2000/04/15 16:47:27 gerd
+ * Last minor changes before releasing 0.6.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+version = "0.5"
+requires = "netstring"
+description = "Compatibility with base64"
--- /dev/null
+version = "0.5"
+requires = "netstring"
+description = "Compatibility with cgi"
--- /dev/null
+base64.cmo: netencoding.cmi base64.cmi
+base64.cmx: netencoding.cmx base64.cmi
+cgi.cmo: mimestring.cmi netbuffer.cmi netencoding.cmi netstream.cmi cgi.cmi
+cgi.cmx: mimestring.cmx netbuffer.cmx netencoding.cmx netstream.cmx cgi.cmi
+mimestring.cmo: netbuffer.cmi netencoding.cmi netstream.cmi netstring_str.cmi \
+ mimestring.cmi
+mimestring.cmx: netbuffer.cmx netencoding.cmx netstream.cmx netstring_str.cmx \
+ mimestring.cmi
+netbuffer.cmo: netbuffer.cmi
+netbuffer.cmx: netbuffer.cmi
+netconversion.cmo: netmappings.cmi netconversion.cmi
+netconversion.cmx: netmappings.cmx netconversion.cmi
+netencoding.cmo: netstring_str.cmi netencoding.cmi
+netencoding.cmx: netstring_str.cmx netencoding.cmi
+nethtml.cmo: nethtml.cmi
+nethtml.cmx: nethtml.cmi
+netmappings.cmo: netmappings.cmi
+netmappings.cmx: netmappings.cmi
+netmappings_iso.cmo: netmappings.cmi
+netmappings_iso.cmx: netmappings.cmx
+netmappings_other.cmo: netmappings.cmi
+netmappings_other.cmx: netmappings.cmx
+netstream.cmo: netbuffer.cmi netstream.cmi
+netstream.cmx: netbuffer.cmx netstream.cmi
+netstring_mt.cmo: cgi.cmi netmappings.cmi netstring_str.cmi netstring_mt.cmi
+netstring_mt.cmx: cgi.cmx netmappings.cmx netstring_str.cmx netstring_mt.cmi
+netstring_str.cmo: netstring_str.cmi
+netstring_str.cmx: netstring_str.cmi
+netstring_top.cmo: netstring_top.cmi
+netstring_top.cmx: netstring_top.cmi
+neturl.cmo: netencoding.cmi neturl.cmi
+neturl.cmx: netencoding.cmx neturl.cmi
+cgi.cmi: netstream.cmi
+mimestring.cmi: netstream.cmi
+netmappings.cmi: netconversion.cmi
+netstream.cmi: netbuffer.cmi
--- /dev/null
+******************************************************************************
+ABOUT-FINDLIB - Package manager for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+The findlib library provides a scheme to manage reusable software components
+(packages), and includes tools that support this scheme. Packages are
+collections of OCaml modules for which metainformation can be stored. The
+packages are kept in the filesystem hierarchy, but with strict directory
+structure. The library contains functions to look the directory up that stores
+a package, to query metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows the user
+to enter queries on the command-line. In order to simplify compilation and
+linkage, there are new frontends of the various OCaml compilers that can
+directly deal with packages.
+
+Together with the packages metainformation is stored. This includes a version
+string, the archives the package consists of, and additional linker options.
+Packages can also be dependent on other packages. There is a query which finds
+out all predecessors of a list of packages and sorts them topologically. The
+new compiler frontends do this implicitly.
+
+Metainformation can be conditional, i.e. depend on a set of predicates. This is
+mainly used to be able to react on certain properties of the environment, such
+as if the bytecode or the native compiler is invoked, if the application is
+multi-threaded, and a few more. If the new compiler frontends are used, most
+predicates are found out automatically.
+
+There is special support for scripts. A new directive, "#require", loads
+packages into scripts. Of course, this works only with newly created toploops
+which include the findlib library.
+
+==============================================================================
+Where to get findlib
+==============================================================================
+
+The manual of findlib is available online [1]. You can download findlib here
+[2].
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
+
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY f "<em>findlib</em>">
+<!ENTITY F "<em>Findlib</em>">
+
+]>
+
+<readme title="ABOUT-FINDLIB - Package manager for O'Caml">
+ <sect1>
+ <title>Abstract</title>
+<p>
+The &f; library provides a scheme to manage reusable software
+components (packages), and includes tools that support this
+scheme. Packages are collections of OCaml modules for which
+metainformation can be stored. The packages are kept in the filesystem
+hierarchy, but with strict directory structure. The library contains
+functions to look the directory up that stores a package, to query
+metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows
+the user to enter queries on the command-line. In order to simplify
+compilation and linkage, there are new frontends of the various OCaml
+compilers that can directly deal with packages.
+</p>
+
+<p>
+Together with the packages metainformation is stored. This includes a
+version string, the archives the package consists of, and additional
+linker options. Packages can also be dependent on other
+packages. There is a query which finds out all predecessors of a list
+of packages and sorts them topologically. The new compiler frontends
+do this implicitly.
+</p>
+
+<p>
+Metainformation can be conditional, i.e. depend on a set of
+predicates. This is mainly used to be able to react on certain
+properties of the environment, such as if the bytecode or the native
+compiler is invoked, if the application is multi-threaded, and a few
+more. If the new compiler frontends are used, most predicates are
+found out automatically.
+</p>
+
+<p>
+There is special support for scripts. A new directive, "#require",
+loads packages into scripts. Of course, this works only with newly
+created toploops which include the &f; library.
+</p>
+
+ </sect1>
+
+ <sect1><title>Where to get findlib</title>
+ <p>
+The manual of &f; is available <a href="&url.findlib-project;">online</a>.
+You can download &f; <a href="&url.findlib-download;">here</a>.
+</p>
+ </sect1>
+</readme>
--- /dev/null
+******************************************************************************
+INSTALL - Netstring, string processing functions for the net
+******************************************************************************
+
+
+==============================================================================
+The "Netstring" package
+==============================================================================
+
+------------------------------------------------------------------------------
+Prerequisites
+------------------------------------------------------------------------------
+
+Netstring does not need any other packages besides the O'Caml core. Netstring
+needs at least O'Caml 3.00. The installation procedure defined in the Makefile
+requires findlib [1] to work [2].
+
+------------------------------------------------------------------------------
+Configuration
+------------------------------------------------------------------------------
+
+It is not necessary to configure "Netstring".
+
+------------------------------------------------------------------------------
+Compilation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make all
+ compiles with the bytecode compiler and creates netstring.cma,
+ netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and
+ netmappings_other.cmo
+
+- make opt
+ compiles with the native compiler and creates netstring.cmxa,
+ netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx
+
+The archive netstring.cmx?a contains the functionality, and the two
+single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add
+configurations to the character set conversion module. These configurations are
+optional:
+
+- Netmappings_iso: Conversion tables for the character sets ISO-8859-2, -3,
+ -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.
+
+- Netmappings_other: Conversion tables for the character sets WINDOWS-1250,
+ -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258; code pages 037, 424,
+ 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861, 862, 863, 864, 865,
+ 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh Roman encoding;
+ Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats Encodings.
+
+Even without these configuration files, the conversion module is able to handle
+the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant of
+UTF-8.
+
+The module Netstring_mt must be linked into multi-threaded applications;
+otherwise some mutexes remain uninitialized.
+
+The module Netstring_top loads several printers for abstract values (for
+toploops).
+
+------------------------------------------------------------------------------
+Installation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make install
+ installs the bytecode archive, the interface definitions, and if present,
+ the native archive in the default location of findlib
+
+- make install-cgi
+ Installs a pseudo package "cgi" which is compatible with the old cgi
+ package. This has the effect that software searching the "cgi" package will
+ find the netstring package instead. This is recommended.
+
+- make install-base64
+ Installs a pseudo package "base64" which is compatible with the old base64
+ package. This has the effect that software searching the "base64" package
+ will find the netstring package instead. This is recommended.
+
+- make uninstall
+ removes the package
+
+- make uninstall-cgi
+ removes the "cgi" compatibility package
+
+- make uninstall-base64
+ removes the "base64" compatibility package
+
+------------------------------------------------------------------------------
+Linking netstring with findlib
+------------------------------------------------------------------------------
+
+The command
+
+ocamlfind ocamlc ... -package netstring ... -linkpkg ...
+
+links as much as possible code from netstring into your application: All
+conversion tables; when -thread is specified, the initialization code for
+multi-threaded programs; when a toploop is created, the code setting the value
+printers.
+
+The following predicates reduce the amount of linked code:
+
+- netstring_only_iso: Only the conversion tables for the ISO-8859 series of
+ character sets are linked.
+
+- netstring_minimum: No additional conversion tables are linked; only
+ ISO-8859-1 and the UTF encodings work.
+
+For example, the command may look like
+
+ocamlfind ocamlc ...
+ -package netstring -predicates netstring_only_iso ... -linkpkg ...
+
+to link only the ISO-8859 conversion tables.
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2] Findlib is a package manager, see the file ABOUT-FINDLIB.
+
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY m "<em>Netstring</em>">
+
+]>
+
+<readme title="INSTALL - Netstring, string processing functions for the net">
+ <sect1><title>The "Netstring" package</title>
+ <sect2><title>Prerequisites</title>
+ <p>
+&m; does not need any other packages besides the O'Caml core. &m; needs
+at least O'Caml 3.00. The installation procedure defined in the Makefile
+requires <a href="&url.findlib-project;">findlib</a> to
+work<footnote><em>Findlib</em> is a package manager, see the file
+ABOUT-FINDLIB.</footnote>.
+</p>
+ </sect2>
+
+ <sect2><title>Configuration</title>
+ <p>
+It is not necessary to configure "Netstring".
+</p>
+ </sect2>
+
+ <sect2><title>Compilation</title>
+ <p>
+The Makefile defines the following goals:
+</p>
+ <ul>
+ <li>
+ <p>make all</p>
+ <p>compiles with the bytecode compiler and creates netstring.cma,
+netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and
+netmappings_other.cmo</p>
+ </li>
+ <li>
+ <p>make opt</p>
+ <p>compiles with the native compiler and creates netstring.cmxa,
+netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx</p>
+ </li>
+ </ul>
+
+ <p>The archive netstring.cmx?a contains the functionality, and the two
+single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add
+configurations to the character set conversion module. These configurations are
+optional:</p>
+
+ <ul>
+ <li><p>Netmappings_iso: Conversion tables for the character sets
+ISO-8859-2, -3, -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.</p>
+ </li>
+ <li><p>Netmappings_other: Conversion tables for the character sets
+WINDOWS-1250, -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258;
+code pages 037, 424, 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861,
+862, 863, 864, 865, 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh
+Roman encoding; Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats
+Encodings.</p>
+ </li>
+ </ul>
+
+<p>Even without these configuration files, the conversion module is able to
+handle the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant
+of UTF-8.</p>
+
+<p>The module Netstring_mt must be linked into multi-threaded applications;
+otherwise some mutexes remain uninitialized.</p>
+
+<p>The module Netstring_top loads several printers for abstract values (for
+toploops).</p>
+
+ </sect2>
+
+ <sect2><title>Installation</title>
+ <p>
+The Makefile defines the following goals:</p>
+ <ul>
+ <li>
+ <p>make install</p>
+ <p>installs the bytecode archive, the interface definitions, and if
+present, the native archive in the default location of <em>findlib</em>
+</p>
+ </li>
+
+ <li>
+ <p>make install-cgi</p>
+ <p>Installs a pseudo package "cgi" which is compatible with the old
+cgi package. This has the effect that software searching the "cgi" package will
+find the netstring package instead. This is recommended.</p>
+ </li>
+
+ <li>
+ <p>make install-base64</p> <p>Installs a pseudo package "base64"
+which is compatible with the old base64 package. This has the effect that
+software searching the "base64" package will find the netstring package
+instead. This is recommended.</p>
+ </li>
+
+ <li>
+ <p>make uninstall</p>
+ <p>removes the package</p>
+ </li>
+
+ <li>
+ <p>make uninstall-cgi</p>
+ <p>removes the "cgi" compatibility package</p>
+ </li>
+
+ <li>
+ <p>make uninstall-base64</p>
+ <p>removes the "base64" compatibility package</p>
+ </li>
+ </ul>
+ </sect2>
+
+
+ <sect2>
+ <title>Linking netstring with findlib</title>
+ <p>The command
+<code>
+ocamlfind ocamlc ... -package netstring ... -linkpkg ...
+</code>
+links as much as possible code from netstring into your application: All
+conversion tables; when -thread is specified, the initialization code for
+multi-threaded programs; when a toploop is created, the code setting the value
+printers.</p>
+
+<p>The following predicates reduce the amount of linked code:</p>
+
+ <ul>
+ <li><p>netstring_only_iso: Only the conversion tables for the ISO-8859
+series of character sets are linked.</p>
+ </li>
+ <li><p>netstring_minimum: No additional conversion tables are linked;
+only ISO-8859-1 and the UTF encodings work.</p>
+ </li>
+ </ul>
+
+<p>For example, the command may look like
+
+<code>
+ocamlfind ocamlc ...
+ -package netstring -predicates netstring_only_iso ... -linkpkg ...
+</code>
+
+to link only the ISO-8859 conversion tables.</p>
+ </sect2>
+
+ </sect1>
+</readme>
\ No newline at end of file
--- /dev/null
+.PHONY: all
+all: README INSTALL ABOUT-FINDLIB
+
+README: README.xml common.xml config.xml
+ readme -text README.xml >README
+
+INSTALL: INSTALL.xml common.xml config.xml
+ readme -text INSTALL.xml >INSTALL
+
+ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
+ readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+
--- /dev/null
+******************************************************************************
+README - Netstring, string processing functions for the net
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+Netstring is a collection of string processing functions that are useful in
+conjunction with Internet messages and protocols. In particular, it contains
+functions for the following purposes:
+
+- Parsing MIME messages
+
+- Several encoding/decoding functions (Base 64, Quoted Printable, Q,
+ URL-encoding)
+
+- A new implementation of the CGI interface that allows users to upload files
+
+- A simple HTML parser
+
+- URL parsing, printing and processing
+
+- Conversion between character sets
+
+==============================================================================
+Download
+==============================================================================
+
+You can download Netstring as gzip'ed tarball [1].
+
+==============================================================================
+Documentation
+==============================================================================
+
+Sorry, there is no manual. The mli files describe each function in detail.
+Furthermore, the following additional information may be useful.
+
+------------------------------------------------------------------------------
+New CGI implementation
+------------------------------------------------------------------------------
+
+For a long time, the CGI implementation by Jean-Christophe Filliatre has been
+the only freely available module that implemented the CGI interface (it also
+based on code by Daniel de Rauglaudre). It worked well, but it did not support
+file uploads because this requires a parser for MIME messages.
+
+The main goal of Netstring is to realize such uploads, and because of this it
+contains an almost complete parser for MIME messages.
+
+The new CGI implementation provides the same functions than the old one, and
+some extensions. If you call Cgi.parse_args(), you get the CGI parameters as
+before, but as already explained this works also if the parameters are
+encaspulated as MIME message. In the HTML code, you can select the MIME format
+by using
+
+<form action="..." method="post" enctype="multipart/form-data">
+...
+</form>
+
+
+- this "enctype" attribute forces the browser to send the form parameters as
+multipart MIME message (Note: You can neither send the parameters of a
+conventional hyperlink as MIME message nor the form parameters if the "method"
+is "get"). In many browsers only this particular encoding enables the file
+upload elements, you cannot perform file uploads with other encodings.
+
+As MIME messages can transport MIME types, filename, and other additional
+properties, it is also possible to get these using the enhanced interface.
+After calling
+
+Cgi.parse_arguments config
+
+you can get all available information about a certain parameter by invoking
+
+let param = Cgi.argument "name"
+
+- where "param" has the type "argument". There are several accessor functions
+to extract the various aspects of arguments (name, filename, value by string,
+value by temporary file, MIME type, MIME header) from "argument" values.
+
+------------------------------------------------------------------------------
+Base64, and other encodings
+------------------------------------------------------------------------------
+
+Netstring is also the successor of the Base64 package. It provides a Base64
+compatible interface, and an enhanced API. The latter is contained in the
+Netencoding module which also offers implementations of the "quoted printable",
+"Q", and "URL" encodings. Please see netencoding.mli for details.
+
+------------------------------------------------------------------------------
+The MIME scanner functions
+------------------------------------------------------------------------------
+
+In the Mimestring module you can find several functions scanning parts of MIME
+messages. These functions already cover most aspects of MIME messages: Scanning
+of headers, analysis of structured header entries, and scanning of multipart
+bodies. Of course, a full-featured MIME scanner would require some more
+functions, especially concrete parsers for frequent structures (mail addresses
+or date strings).
+
+Please see the file mimestring.mli for details.
+
+------------------------------------------------------------------------------
+The HTML parser
+------------------------------------------------------------------------------
+
+The HTML parser should be able to read every HTML file; whether it is correct
+or not. The parser tries to recover from parsing errors as much as possible.
+
+The parser returns the HTML term as conventional recursive value (i.e. no
+object-oriented design).
+
+The parser depends a bit on knowledge about the HTML version; mainly because it
+needs to know the tags that are always empty. It may be necessary that you must
+adjust this configuration before the parser works well enough for your purpose.
+
+Please see the Nethtml module for details.
+
+------------------------------------------------------------------------------
+The abstract data type URL
+------------------------------------------------------------------------------
+
+The module Neturl contains support for URL parsing and processing. The
+implementation follows strictly the standards RFC 1738 and RFC 1808. URLs can
+be parsed, and several accessor functions allow the user to get components of
+parsed URLs, or to change components. Modifying URLs is safe; it is impossible
+to create a URL that does not have a valid string representation.
+
+Both absolute and relative URLs are supported. It is possible to apply a
+relative URL to a base URL in order to get the corresponding absolute URL.
+
+------------------------------------------------------------------------------
+Conversion between character sets and encodings
+------------------------------------------------------------------------------
+
+The module Netconversion converts strings from one characters set to another.
+It is Unicode-based, and there are conversion tables for more than 50
+encodings.
+
+==============================================================================
+Author, Copying
+==============================================================================
+
+Netstring has been written by Gerd Stolpmann [2]. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+
+==============================================================================
+History
+==============================================================================
+
+- Changed in 0.9.3: Fixed a bug in the "install" rule of the Makefile.
+
+- Changed in 0.9.2: New format for the conversion tables which are now much
+ smaller.
+
+- Changed in 0.9.1: Updated the Makefile such that (native-code) compilation
+ of netmappings.ml becomes possible.
+
+- Changed in 0.9: Extended Mimestring module: It can now process RFC-2047
+ messages.
+ New Netconversion module which converts strings between character encodings.
+
+- Changed in 0.8.1: Added the component url_accepts_8bits to
+ Neturl.url_syntax. This helps processing URLs which intentionally contain
+ bytes >= 0x80.
+ Fixed a bug: Every URL containing a 'j' was malformed!
+
+- Changed in 0.8: Added the module Neturl which provides the abstract data
+ types of URLs.
+ The whole package is now thread-safe.
+ Added printers for the various opaque data types.
+ Added labels to function arguments where appropriate. The following
+ functions changed their signatures significantly: Cgi.mk_memory_arg,
+ Cgi.mk_file_arg.
+
+- Changed in 0.7: Added workarounds for frequent browser bugs. Some functions
+ take now an additional argument specifying which workarounds are enabled.
+
+- Changed in 0.6.1: Updated URLs in documentation.
+
+- Changed in 0.6: The file upload has been re-implemented to support large
+ files; the file is now read block by block and the blocks can be collected
+ either in memory or in a temporary file.
+ Furthermore, the CGI API has been revised. There is now an opaque data type
+ "argument" that hides all implementation details and that is extensible (if
+ necessary, it is possible to add features without breaking the interface
+ again).
+ The CGI argument parser can be configured; currently it is possible to limit
+ the size of uploaded data, to control by which method arguments are
+ processed, and to set up where temporary files are created.
+ The other parts of the package that have nothing to do with CGI remain
+ unchanged.
+
+- Changed in 0.5.1: A mistake in the documentation has been corrected.
+
+- Initial version 0.5: The Netstring package wants to be the successor of the
+ Base64-0.2 and the Cgi-0.3 packages. The sum of both numbers is 0.5, and
+ because of this, the first version number is 0.5.
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/netstring-0.9.2.tar.gz
+
+[2] see mailto:gerd@gerd-stolpmann.de
+
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="README - Netstring, string processing functions for the net">
+ <sect1>
+ <title>Abstract</title>
+ <p>
+<em>Netstring</em> is a collection of string processing functions that are
+useful in conjunction with Internet messages and protocols. In particular,
+it contains functions for the following purposes:</p>
+
+<ul>
+ <li><p>Parsing MIME messages</p>
+ </li>
+ <li><p>Several encoding/decoding functions (Base 64, Quoted Printable, Q, URL-encoding)</p>
+ </li>
+ <li><p>A new implementation of the CGI interface that allows users to upload files</p>
+ </li>
+ <li><p>A simple HTML parser</p>
+ </li>
+ <li><p>URL parsing, printing and processing</p>
+ </li>
+ <li><p>Conversion between character sets</p>
+ </li>
+ </ul>
+
+ </sect1>
+
+ <sect1>
+ <title>Download</title>
+ <p>
+You can download <em>Netstring</em> as gzip'ed <a
+ href="&url.netstring-download;">tarball</a>.
+</p>
+
+ </sect1>
+
+ <sect1>
+ <title>Documentation</title>
+ <p>
+Sorry, there is no manual. The mli files describe each function in
+detail. Furthermore, the following additional information may be useful.</p>
+
+ <sect2>
+ <title>New CGI implementation</title>
+
+ <p>For a long time, the CGI implementation by Jean-Christophe Filliatre
+has been the only freely available module that implemented the CGI interface
+(it also based on code by Daniel de Rauglaudre). It worked well, but it did not
+support file uploads because this requires a parser for MIME messages.</p>
+ <p>The main goal of Netstring is to realize such uploads, and because of
+this it contains an almost complete parser for MIME messages.</p>
+ <p>The new CGI implementation provides the same functions than the old
+one, and some extensions. If you call Cgi.parse_args(), you get the CGI
+parameters as before, but as already explained this works also if the
+parameters are encaspulated as MIME message. In the HTML code, you can select
+the MIME format by using
+<code><![CDATA[
+<form action="..." method="post" enctype="multipart/form-data">
+...
+</form>
+]]>
+</code>
+- this "enctype" attribute forces the browser to send the form parameters
+as multipart MIME message (Note: You can neither send the parameters of a
+conventional hyperlink as MIME message nor the form parameters if the
+"method" is "get"). In many browsers only this particular encoding enables
+the file upload elements, you cannot perform file uploads with other encodings.
+</p>
+
+ <p>As MIME messages can transport MIME types, filename, and other
+additional properties, it is also possible to get these using the enhanced
+interface. After calling
+<code><![CDATA[
+Cgi.parse_arguments config
+]]></code>
+you can get all available information about a certain parameter by invoking
+<code><![CDATA[
+let param = Cgi.argument "name"
+]]></code>
+- where "param" has the type "argument". There are several accessor functions
+to extract the various aspects of arguments (name, filename, value by string,
+value by temporary file, MIME type, MIME header) from "argument" values.
+</p>
+
+ </sect2>
+
+
+ <sect2>
+ <title>Base64, and other encodings</title>
+
+ <p>Netstring is also the successor of the Base64 package. It provides a
+Base64 compatible interface, and an enhanced API. The latter is contained in
+the Netencoding module which also offers implementations of the "quoted
+printable", "Q", and "URL" encodings. Please see netencoding.mli for
+details.</p>
+
+ </sect2>
+
+
+ <sect2>
+ <title>The MIME scanner functions</title>
+
+ <p>In the Mimestring module you can find several functions scanning parts
+of MIME messages. These functions already cover most aspects of MIME messages:
+Scanning of headers, analysis of structured header entries, and scanning of
+multipart bodies. Of course, a full-featured MIME scanner would require some
+more functions, especially concrete parsers for frequent structures
+(mail addresses or date strings).
+</p>
+ <p>Please see the file mimestring.mli for details.</p>
+ </sect2>
+
+
+ <sect2>
+ <title>The HTML parser</title>
+
+ <p>The HTML parser should be able to read every HTML file; whether it is
+correct or not. The parser tries to recover from parsing errors as much as
+possible.
+</p>
+ <p>The parser returns the HTML term as conventional recursive value
+(i.e. no object-oriented design).</p>
+ <p>The parser depends a bit on knowledge about the HTML version; mainly
+because it needs to know the tags that are always empty. It may be necessary
+that you must adjust this configuration before the parser works well enough for
+your purpose.
+</p>
+ <p>Please see the Nethtml module for details.</p>
+ </sect2>
+
+ <sect2>
+ <title>The abstract data type URL</title>
+ <p>The module Neturl contains support for URL parsing and processing.
+The implementation follows strictly the standards RFC 1738 and RFC 1808.
+URLs can be parsed, and several accessor functions allow the user to
+get components of parsed URLs, or to change components. Modifying URLs
+is safe; it is impossible to create a URL that does not have a valid
+string representation.</p>
+
+ <p>Both absolute and relative URLs are supported. It is possible to
+apply a relative URL to a base URL in order to get the corresponding
+absolute URL.</p>
+ </sect2>
+
+ <sect2>
+ <title>Conversion between character sets and encodings</title>
+ <p>The module Netconversion converts strings from one characters set
+to another. It is Unicode-based, and there are conversion tables for more than
+50 encodings.</p>
+ </sect2>
+
+ </sect1>
+
+ <sect1>
+ <title>Author, Copying</title>
+ <p>
+<em>Netstring</em> has been written by &person.gps;. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+</p>
+ </sect1>
+
+ <sect1>
+ <title>History</title>
+
+ <ul>
+ <li><p><em>Changed in 0.9.3:</em> Fixed a bug in the "install" rule of
+the Makefile.</p>
+ </li>
+ <li><p><em>Changed in 0.9.2:</em> New format for the conversion tables
+which are now much smaller.</p>
+ </li>
+ <li><p><em>Changed in 0.9.1:</em> Updated the Makefile such that
+(native-code) compilation of netmappings.ml becomes possible.
+</p>
+ </li>
+ <li><p><em>Changed in 0.9:</em> Extended Mimestring module: It can
+now process RFC-2047 messages.</p>
+ <p>New Netconversion module which converts strings between character
+encodings.</p>
+ </li>
+ <li><p><em>Changed in 0.8.1:</em> Added the component
+url_accepts_8bits to Neturl.url_syntax. This helps processing URLs which
+intentionally contain bytes >= 0x80.</p>
+ <p>Fixed a bug: Every URL containing a 'j' was malformed!</p>
+ </li>
+ <li><p><em>Changed in 0.8:</em> Added the module Neturl which
+provides the abstract data types of URLs.</p>
+ <p>The whole package is now thread-safe.</p>
+ <p>Added printers for the various opaque data types.</p>
+ <p>Added labels to function arguments where appropriate. The
+following functions changed their signatures significantly:
+Cgi.mk_memory_arg, Cgi.mk_file_arg.</p>
+ </li>
+ <li><p><em>Changed in 0.7:</em> Added workarounds for frequent
+browser bugs. Some functions take now an additional argument
+specifying which workarounds are enabled.</p>
+ </li>
+ <li><p><em>Changed in 0.6.1:</em> Updated URLs in documentation.</p>
+ </li>
+
+ <li><p><em>Changed in 0.6:</em> The file upload has been re-implemented
+to support large files; the file is now read block by block and the blocks can
+be collected either in memory or in a temporary file.<br/>
+Furthermore, the CGI API has been revised. There is now an opaque data type
+"argument" that hides all implementation details and that is extensible (if
+necessary, it is possible to add features without breaking the interface
+again).<br/>
+The CGI argument parser can be configured; currently it is possible to
+limit the size of uploaded data, to control by which method arguments are
+processed, and to set up where temporary files are created.<br/>
+The other parts of the package that have nothing to do with CGI remain
+unchanged.
+</p>
+ </li>
+
+ <li><p><em>Changed in 0.5.1:</em> A mistake in the documentation has
+been corrected.</p>
+ </li>
+
+ <li><p><em>Initial version 0.5:</em>
+The Netstring package wants to be the successor of the Base64-0.2 and
+the Cgi-0.3 packages. The sum of both numbers is 0.5, and because of this,
+the first version number is 0.5.
+</p>
+ </li>
+ </ul>
+ </sect1>
+
+</readme>
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Str = Netstring_str;;
+
+let cr_or_lf_re = Str.regexp "[\013\n]";;
+
+let trim_right_spaces_re =
+ Str.regexp "[ \t]+$";;
+
+let trim_left_spaces_re =
+ Str.regexp "^[ \t]+";;
+
+let header_re =
+ Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;
+
+let empty_line_re =
+ Str.regexp "\013?\n";;
+
+let end_of_header_re =
+ Str.regexp "\n\013?\n";;
+
+
+let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
+ let rec parse_header i l =
+ match Str.string_partial_match header_re parstr i with
+ Some r ->
+ let i' = Str.match_end r in
+ if i' > i1 then
+ failwith "Mimestring.scan_header";
+ let name = String.lowercase(Str.matched_group r 1 parstr) in
+ let value_with_crlf =
+ Str.matched_group r 2 parstr in
+ let value =
+ if unfold then begin
+ let value_with_rspaces =
+ Str.global_replace cr_or_lf_re "" value_with_crlf in
+ let value_with_lspaces =
+ Str.global_replace trim_right_spaces_re "" value_with_rspaces in
+ Str.global_replace trim_left_spaces_re "" value_with_lspaces
+ end
+ else value_with_crlf
+ in
+ parse_header i' ( (name,value) :: l)
+ | None ->
+ (* The header must end with an empty line *)
+ begin match Str.string_partial_match empty_line_re parstr i with
+ Some r' ->
+ List.rev l, Str.match_end r'
+ | None ->
+ failwith "Mimestring.scan_header"
+ end
+ in
+ parse_header i0 []
+;;
+
+type s_token =
+ Atom of string
+ | EncodedWord of (string * string * string)
+ | QString of string
+ | Control of char
+ | Special of char
+ | DomainLiteral of string
+ | Comment
+ | End
+;;
+
+type s_option =
+ No_backslash_escaping
+ | Return_comments
+ | Recognize_encoded_words
+;;
+
+type s_extended_token =
+ { token : s_token;
+ token_pos : int;
+ token_line : int;
+ token_linepos : int; (* Position of the beginning of the line *)
+ token_len : int;
+ mutable token_sep : bool; (* separates adjacent encoded words *)
+ }
+;;
+
+let get_token et = et.token;;
+let get_pos et = et.token_pos;;
+let get_line et = et.token_line;;
+let get_column et = et.token_pos - et.token_linepos;;
+let get_length et = et.token_len;;
+let separates_adjacent_encoded_words et = et.token_sep;;
+
+let get_decoded_word et =
+ match et.token with
+ Atom s -> s
+ | QString s -> s
+ | Control c -> String.make 1 c
+ | Special c -> String.make 1 c
+ | DomainLiteral s -> s
+ | Comment -> ""
+ | EncodedWord (_, encoding, content) ->
+ ( match encoding with
+ ("Q"|"q") ->
+ Netencoding.Q.decode content
+ | ("B"|"b") ->
+ Netencoding.Base64.decode
+ ~url_variant:false
+ ~accept_spaces:false
+ content
+ | _ -> failwith "get_decoded_word"
+ )
+ | End ->
+ failwith "get_decoded_word"
+;;
+
+let get_charset et =
+ match et.token with
+ EncodedWord (charset, _, _) -> charset
+ | End -> failwith "get_charset"
+ | _ -> "US-ASCII"
+;;
+
+type scanner_spec =
+ { (* What the user specifies: *)
+ scanner_specials : char list;
+ scanner_options : s_option list;
+ (* Derived from that: *)
+ mutable opt_no_backslash_escaping : bool;
+ mutable opt_return_comments : bool;
+ mutable opt_recognize_encoded_words : bool;
+
+ mutable is_special : bool array;
+ mutable space_is_special : bool;
+ }
+;;
+
+type scanner_target =
+ { scanned_string : string;
+ mutable scanner_pos : int;
+ mutable scanner_line : int;
+ mutable scanner_linepos : int;
+ (* Position of the beginning of the line *)
+ mutable scanned_tokens : s_extended_token Queue.t;
+ (* A queue of already scanned tokens in order to look ahead *)
+ mutable last_token : s_token;
+ (* The last returned token. It is only important whether it is
+ * EncodedWord or not.
+ *)
+ }
+;;
+
+type mime_scanner = scanner_spec * scanner_target
+;;
+
+let get_pos_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_pos_of_scanner"
+ else
+ target.scanner_pos
+;;
+
+let get_line_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_line_of_scanner"
+ else
+ target.scanner_line
+;;
+
+let get_column_of_scanner (spec, target) =
+ if spec.opt_recognize_encoded_words then
+ failwith "get_column_of_scanner"
+ else
+ target.scanner_pos - target.scanner_linepos
+;;
+
+let create_mime_scanner ~specials ~scan_options =
+ let is_spcl = Array.create 256 false in
+ List.iter
+ (fun c -> is_spcl.( Char.code c ) <- true)
+ specials;
+ let spec =
+ { scanner_specials = specials;
+ scanner_options = scan_options;
+ opt_no_backslash_escaping =
+ List.mem No_backslash_escaping scan_options;
+ opt_return_comments =
+ List.mem Return_comments scan_options;
+ opt_recognize_encoded_words =
+ List.mem Recognize_encoded_words scan_options;
+ is_special = is_spcl;
+ space_is_special = is_spcl.(32);
+ }
+ in
+ (* Grab the remaining arguments: *)
+ fun ?(pos=0) ?(line=1) ?(column=0) s ->
+ let target =
+ { scanned_string = s;
+ scanner_pos = pos;
+ scanner_line = line;
+ scanner_linepos = pos - column;
+ scanned_tokens = Queue.create();
+ last_token = Comment; (* Must not be initialized with EncodedWord *)
+ }
+ in
+ spec, target
+;;
+
+
+let encoded_word_re =
+ Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;
+
+let scan_next_token ((spec,target) as scn) =
+ let mk_pair t len =
+ { token = t;
+ token_pos = target.scanner_pos;
+ token_line = target.scanner_line;
+ token_linepos = target.scanner_linepos;
+ token_len = len;
+ token_sep = false;
+ },
+ t
+ in
+
+ (* Note: mk_pair creates a new token pair, and it assumes that
+ * target.scanner_pos (and also scanner_line and scanner_linepos)
+ * still contain the position of the beginning of the token.
+ *)
+
+ let s = target.scanned_string in
+ let l = String.length s in
+ let rec scan i =
+ if i < l then begin
+ let c = s.[i] in
+ if spec.is_special.( Char.code c ) then begin
+ let pair = mk_pair (Special c) 1 in
+ target.scanner_pos <- target.scanner_pos + 1;
+ (match c with
+ '\n' ->
+ target.scanner_line <- target.scanner_line + 1;
+ target.scanner_linepos <- target.scanner_pos;
+ | _ -> ()
+ );
+ pair
+ end
+ else
+ match c with
+ '"' ->
+ (* Quoted string: *)
+ scan_qstring (i+1) (i+1) 0
+ | '(' ->
+ (* Comment: *)
+ let i', line, linepos =
+ scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
+ in
+ let advance() =
+ target.scanner_pos <- i';
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos
+ in
+ if spec.opt_return_comments then begin
+ let pair = mk_pair Comment (i' - i) in
+ advance();
+ pair
+ end
+ else
+ if spec.space_is_special then begin
+ let pair = mk_pair (Special ' ') (i' - i) in
+ advance();
+ pair
+ end
+ else begin
+ advance();
+ scan i'
+ end
+ | (' '|'\t'|'\r') ->
+ (* Ignore whitespace by default: *)
+ target.scanner_pos <- target.scanner_pos + 1;
+ scan (i+1)
+ | '\n' ->
+ (* Ignore whitespace by default: *)
+ target.scanner_pos <- target.scanner_pos + 1;
+ target.scanner_line <- target.scanner_line + 1;
+ target.scanner_linepos <- target.scanner_pos;
+ scan (i+1)
+ | ('\000'..'\031'|'\127'..'\255') ->
+ let pair = mk_pair (Control c) 1 in
+ target.scanner_pos <- target.scanner_pos + 1;
+ pair
+ | '[' ->
+ (* Domain literal: *)
+ scan_dliteral (i+1) (i+1) 0
+ | _ ->
+ scan_atom i i
+ end
+ else
+ mk_pair End 0
+
+ and scan_atom i0 i =
+ let return_atom() =
+ let astring = String.sub s i0 (i-i0) in
+ let r =
+ if spec.opt_recognize_encoded_words then
+ Str.string_match ~groups:4 encoded_word_re astring 0
+ else
+ None
+ in
+ match r with
+ None ->
+ (* An atom contains never a linefeed character, so we can ignore
+ * scanner_line here.
+ *)
+ let pair = mk_pair (Atom astring) (i-i0) in
+ target.scanner_pos <- i;
+ pair
+ | Some mr ->
+ (* Found an encoded word. *)
+ let charset = Str.matched_group mr 1 astring in
+ let encoding = Str.matched_group mr 2 astring in
+ let content = Str.matched_group mr 3 astring in
+ let t = EncodedWord(String.uppercase charset,
+ String.uppercase encoding,
+ content) in
+ let pair = mk_pair t (i-i0) in
+ target.scanner_pos <- i;
+ pair
+ in
+
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
+ return_atom()
+ | _ ->
+ if spec.is_special.( Char.code c ) then
+ return_atom()
+ else
+ scan_atom i0 (i+1)
+ else
+ return_atom()
+
+ and scan_qstring i0 i n =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ '"' ->
+ (* Regular end of the quoted string: *)
+ let content, line, linepos = copy_qstring i0 (i-1) n in
+ let pair = mk_pair (QString content) (i-i0+2) in
+ target.scanner_pos <- i+1;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ scan_qstring i0 (i+2) (n+1)
+ | _ ->
+ scan_qstring i0 (i+1) (n+1)
+ else
+ (* Missing right double quote *)
+ let content, line, linepos = copy_qstring i0 (l-1) n in
+ let pair = mk_pair (QString content) (l-i0+1) in
+ target.scanner_pos <- l;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+
+ and copy_qstring i0 i1 n =
+ (* Used for quoted strings and for domain literals *)
+ let r = String.create n in
+ let k = ref 0 in
+ let line = ref target.scanner_line in
+ let linepos = ref target.scanner_linepos in
+ for i = i0 to i1 do
+ let c = s.[i] in
+ match c with
+ '\\' when i < i1 && not spec.opt_no_backslash_escaping -> ()
+ | '\n' ->
+ line := !line + 1;
+ linepos := i+1;
+ r.[ !k ] <- c;
+ incr k
+ | _ ->
+ r.[ !k ] <- c;
+ incr k
+ done;
+ assert (!k = n);
+ r, !line, !linepos
+
+ and scan_dliteral i0 i n =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ']' ->
+ (* Regular end of the domain literal: *)
+ let content, line, linepos = copy_qstring i0 (i-1) n in
+ let pair = mk_pair (DomainLiteral content) (i-i0+2) in
+ target.scanner_pos <- i+1;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ scan_dliteral i0 (i+2) (n+1)
+ | _ ->
+ (* Note: '[' is not allowed by RFC 822; we treat it here as
+ * a regular character (questionable)
+ *)
+ scan_dliteral i0 (i+1) (n+1)
+ else
+ (* Missing right bracket *)
+ let content, line, linepos = copy_qstring i0 (l-1) n in
+ let pair = mk_pair (DomainLiteral content) (l-i0+1) in
+ target.scanner_pos <- l;
+ target.scanner_line <- line;
+ target.scanner_linepos <- linepos;
+ pair
+
+
+ and scan_comment i level line linepos =
+ if i < l then
+ let c = s.[i] in
+ match c with
+ ')' ->
+ (i+1), line, linepos
+ | '(' ->
+ (* nested comment *)
+ let i', line', linepos' =
+ scan_comment (i+1) (level+1) line linepos
+ in
+ scan_comment i' level line' linepos'
+ | '\\' when not spec.opt_no_backslash_escaping ->
+ if (i+1) < l && s.[i+1] = '\n' then
+ scan_comment (i+2) level (line+1) (i+2)
+ else
+ scan_comment (i+2) level line linepos
+ | '\n' ->
+ scan_comment (i+1) level (line+1) (i+1)
+ | _ ->
+ scan_comment (i+1) level line linepos
+ else
+ (* Missing closing ')' *)
+ i, line, linepos
+ in
+
+ scan target.scanner_pos
+;;
+
+
+let scan_token ((spec,target) as scn) =
+ (* This function handles token queueing in order to recognize white space
+ * that separates adjacent encoded words.
+ *)
+
+ let rec collect_whitespace () =
+ (* Scans whitespace tokens and returns them as:
+ * (ws_list, other_tok) if there is some other_tok following the
+ * list (other_tok = End is possible)
+ *)
+ let (et, t) as pair = scan_next_token scn in
+ ( match t with
+ (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
+ let ws_list, tok = collect_whitespace() in
+ pair :: ws_list, tok
+ | _ ->
+ [], pair
+ )
+ in
+
+ try
+ (* Is there an already scanned token in the queue? *)
+ let et = Queue.take target.scanned_tokens in
+ let t = et.token in
+ target.last_token <- t;
+ et, et.token
+ with
+ Queue.Empty ->
+ (* If not: inspect the last token. If that token is an EncodedWord,
+ * the next tokens are scanned in advance to determine if there
+ * are spaces separating two EncodedWords. These tokens are put
+ * into the queue such that it is avoided that they are scanned
+ * twice. (The sole purpose of the queue.)
+ *)
+ match target.last_token with
+ EncodedWord(_,_,_) as ew ->
+ let ws_list, tok = collect_whitespace() in
+ (* If tok is an EncodedWord, too, the tokens in ws_list must
+ * be flagged as separating two adjacent encoded words.
+ *)
+ ( match tok with
+ _, EncodedWord(_,_,_) ->
+ List.iter
+ (fun (et,t) ->
+ et.token_sep <- true)
+ ws_list
+ | _ ->
+ ()
+ );
+ (* Anyway, queue the read tokens but the first up *)
+ ( match ws_list with
+ [] ->
+ (* Nothing to queue *)
+ let et, t = tok in
+ target.last_token <- t;
+ tok
+ | (et,t) as pair :: ws_list' ->
+ List.iter
+ (fun (et',_) ->
+ Queue.add et' target.scanned_tokens)
+ ws_list';
+ ( match tok with
+ | _, End ->
+ ()
+ | (et',_) ->
+ Queue.add et' target.scanned_tokens
+ );
+ (* Return the first scanned token *)
+ target.last_token <- t;
+ pair
+ )
+ | _ ->
+ (* Regular case: Scan one token; do not queue it up *)
+ let (et, t) as pair = scan_next_token scn in
+ target.last_token <- t;
+ pair
+;;
+
+
+let scan_token_list scn =
+ let rec collect() =
+ match scan_token scn with
+ _, End ->
+ []
+ | pair ->
+ pair :: collect()
+ in
+ collect()
+;;
+
+
+let scan_structured_value s specials options =
+ let rec collect scn =
+ match scan_token scn with
+ _, End ->
+ []
+ | _, t ->
+ t :: collect scn
+ in
+ let scn = create_mime_scanner specials options s in
+ collect scn
+;;
+
+
+let specials_rfc822 =
+ [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;
+
+
+let specials_rfc2045 =
+ [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;
+
+
+let scan_encoded_text_value s =
+ let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
+ let options = [ Recognize_encoded_words ] in
+ let scn = create_mime_scanner specials options s in
+
+ let rec collect () =
+ match scan_token scn with
+ _, End ->
+ []
+ | et, _ when separates_adjacent_encoded_words et ->
+ collect()
+ | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
+ et :: collect ()
+ | _, _ ->
+ assert false
+ in
+ collect()
+;;
+
+
+let scan_value_with_parameters s options =
+ let rec parse_params tl =
+ match tl with
+ Atom n :: Special '=' :: Atom v :: tl' ->
+ (n,v) :: parse_rest tl'
+ | Atom n :: Special '=' :: QString v :: tl' ->
+ (n,v) :: parse_rest tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+ and parse_rest tl =
+ match tl with
+ [] -> []
+ | Special ';' :: tl' ->
+ parse_params tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+ in
+
+ (* Note: Even if not used here, the comma is a very common separator
+ * and should be recognized as being special. You will get a
+ * failure if there is a comma in the scanned string.
+ *)
+ let tl = scan_structured_value s [ ';'; '='; ',' ] options in
+ match tl with
+ [ Atom n ] -> n, []
+ | [ QString n ] -> n, []
+ | Atom n :: Special ';' :: tl' ->
+ n, parse_params tl'
+ | QString n :: Special ';' :: tl' ->
+ n, parse_params tl'
+ | _ ->
+ failwith "Mimestring.scan_value_with_parameters"
+;;
+
+
+let scan_mime_type s options =
+ let n, params = scan_value_with_parameters s options in
+ (String.lowercase n),
+ (List.map (fun (n,v) -> (String.lowercase n, v)) params)
+;;
+
+
+let lf_re = Str.regexp "[\n]";;
+
+let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
+ let l_s = String.length s in
+ if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
+ invalid_arg "Mimestring.scan_multipart_body";
+
+ (* First compile the regexps scanning for 'boundary': *)
+ let boundary1_re =
+ Str.regexp ("\n--" ^ Str.quote boundary) in
+ let boundary2_re =
+ Str.regexp ("--" ^ Str.quote boundary) in
+
+ let rec parse i =
+ (* i: Beginning of the current part (position directly after the
+ * boundary line
+ *)
+ (* Search for next boundary at position i *)
+ let i' =
+ try min (fst (Str.search_forward boundary1_re s i) + 1) i1
+ with
+ Not_found -> i1
+ in
+ (* i': Either the position of the first '-' of the boundary line,
+ * or i1 if no boundary has been found
+ *)
+ if i' >= i1 then
+ [] (* Ignore everything after the last boundary *)
+ else
+ let i'' =
+ try min (fst (Str.search_forward lf_re s i') + 1) i1
+ with
+ Not_found -> i1
+ in
+ (* i'': The position after the boundary line *)
+(*
+ print_int i; print_newline();
+ print_int i'; print_newline();
+ print_int i''; print_newline();
+ flush stdout;
+*)
+ let header, k = scan_header s i i' in
+ (* header: the header of the part
+ * k: beginning of the body
+ *)
+
+ let value =
+ (* We know that i'-1 is a linefeed character. i'-2 should be a CR
+ * character. Both characters are not part of the value.
+ *)
+ if i' >= 2 then
+ match s.[i'-2] with
+ '\013' -> String.sub s k (i'-2-k)
+ | _ -> String.sub s k (i'-1-k)
+ else
+ String.sub s k (i'-1-k)
+ in
+
+ let pair =
+ (header, value) in
+
+ if i'' >= i1
+ then
+ [ pair ]
+ else
+ pair :: parse i''
+ in
+
+ (* Find the first boundary. This is a special case, because it may be
+ * right at the beginning of the string (no preceding CRLF)
+ *)
+
+ let i_bnd =
+ if Str.string_partial_match boundary2_re s i0 <> None then
+ i0
+ else
+ try min (fst (Str.search_forward boundary1_re s i0)) i1
+ with
+ Not_found -> i1
+ in
+
+ if i_bnd >= i1 then
+ []
+ else
+ let i_bnd' =
+ try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
+ with
+ Not_found -> i1
+ in
+ if i_bnd' >= i1 then
+ []
+ else
+ parse i_bnd'
+;;
+
+
+let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
+ let parts = scan_multipart_body s i0 i1 boundary in
+ List.map
+ (fun (params, value) ->
+ let encoding =
+ try List.assoc "content-transfer-encoding" params
+ with Not_found -> "7bit"
+ in
+
+ (* NOTE: In the case of "base64" and "quoted-printable", the allocation
+ * of the string "value" could be avoided.
+ *)
+
+ let value' =
+ match encoding with
+ ("7bit"|"8bit"|"binary") -> value
+ | "base64" ->
+ Netencoding.Base64.decode_substring
+ value 0 (String.length value) false true
+ | "quoted-printable" ->
+ Netencoding.QuotedPrintable.decode_substring
+ value 0 (String.length value)
+ | _ ->
+ failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
+ in
+ (params, value')
+ )
+ parts
+;;
+
+
+let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =
+
+ (* The block size of s must be at least the length of the boundary + 3.
+ * Otherwise it is not guaranteed that the boundary is always recognized.
+ *)
+ if Netstream.block_size s < String.length boundary + 3 then
+ invalid_arg "Mimestring.scan_multipart_body_from_netstream";
+
+ (* First compile the regexps scanning for 'boundary': *)
+ let boundary1_re =
+ Str.regexp ("\n--" ^ Str.quote boundary) in
+ let boundary2_re =
+ Str.regexp ("--" ^ Str.quote boundary) in
+
+ (* Subtask 1: Search the end of the MIME header: CR LF CR LF
+ * (or LF LF). Enlarge the window until the complete header
+ * is covered by the window.
+ *)
+ let rec search_end_of_header k =
+ (* Search the end of the header beginning at position k of the
+ * current window.
+ * Return the position of the first character of the body.
+ *)
+ try
+ (* Search for LF CR? LF: *)
+ let i, r = Str.search_forward
+ end_of_header_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then
+ Str.match_end r
+ else
+ raise Not_found
+ with
+ Not_found ->
+ (* If the end of the stream is reached, the end of the header
+ * is missing: Error.
+ * Otherwise, we try to read another block, and continue.
+ *)
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
+ let w0 = Netstream.window_length s in
+ Netstream.want_another_block s;
+ search_end_of_header (max (w0 - 2) 0)
+ in
+
+ (* Subtask 2: Search the first boundary line. *)
+ let rec search_first_boundary() =
+ (* Search boundary per regexp; return the position of the character
+ * immediately following the boundary (on the same line), or
+ * raise Not_found.
+ *)
+ try
+ (* Search boundary per regexp: *)
+ let i, r = Str.search_forward
+ boundary1_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ 0
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then raise Not_found;
+ (* The regexp did not match: Move the window by one block.
+ *)
+ let n =
+ min
+ (Netstream.window_length s)
+ (Netstream.block_size s)
+ in
+ Netstream.move s n;
+ search_first_boundary()
+ in
+
+ (* Subtask 3: Search the next boundary line. Invoke 'add' for every
+ * read chunk
+ *)
+ let rec search_next_boundary p =
+ (* Returns the position directly after the boundary on the same line *)
+ try
+ (* Search boundary per regexp: *)
+ let i,r = Str.search_forward
+ boundary1_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ 0
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ (* Add the last chunk of the part. *)
+ let n =
+ (* i is a LF. i - 1 should be CR. Ignore these characters. *)
+ if i >= 1 then
+ match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
+ '\013' -> i - 1
+ | _ -> i
+ else
+ i
+ in
+ (* Printf.printf "add n=%d\n" n; *)
+ add p s 0 n;
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
+ (* The regexp did not match: Add the first block of the window;
+ * and move the window.
+ *)
+ let n =
+ min
+ (Netstream.window_length s)
+ (Netstream.block_size s)
+ in
+ (* Printf.printf "add n=%d\n" n; *)
+ add p s 0 n;
+ Netstream.move s n;
+ search_next_boundary p
+ in
+
+ (* Subtask 4: Search the end of the boundary line *)
+ let rec search_end_of_line k =
+ (* Search LF beginning at position k. Discard any contents until that. *)
+ try
+ let i,r = Str.search_forward
+ lf_re
+ (Netbuffer.unsafe_buffer (Netstream.window s))
+ k
+ in
+ (* If match_end <= window_length, the search was successful.
+ * Otherwise, we searched in the uninitialized region of the
+ * buffer.
+ *)
+ if Str.match_end r <= Netstream.window_length s then begin
+ Str.match_end r
+ end
+ else raise Not_found
+ with
+ Not_found ->
+ if Netstream.at_eos s then
+ failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
+ (* The regexp did not match: move the window.
+ *)
+ let n = Netstream.window_length s in
+ Netstream.move s n;
+ search_end_of_line 0
+ in
+
+ (* Subtask 5: Check whether "--" follows the boundary on the same line *)
+ let check_whether_last_boundary k =
+ (* k: The position directly after the boundary. *)
+ Netstream.want s (k+2);
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
+ in
+
+ (* Subtask 6: Check whether the buffer begins with a boundary. *)
+ let check_beginning_is_boundary () =
+ let m = String.length boundary + 2 in
+ Netstream.want s m;
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ (Netstream.window_length s >= m) &&
+ (Str.string_partial_match boundary2_re str 0 <> None)
+ in
+
+ let rec parse_part () =
+ (* The first byte of the current window of s contains the character
+ * directly following the boundary line that starts this part.
+ *)
+ (* Search the end of the MIME header: *)
+ let k_eoh = search_end_of_header 0 in
+ (* Printf.printf "k_eoh=%d\n" k_eoh; *)
+ (* Get the MIME header: *)
+ let str = Netbuffer.unsafe_buffer (Netstream.window s) in
+ let header, k_eoh' = scan_header str 0 k_eoh in
+ assert (k_eoh = k_eoh');
+ (* Move the window over the header: *)
+ Netstream.move s k_eoh;
+ (* Create the part: *)
+ let p = create header in
+ let continue =
+ begin try
+ (* Search the next boundary; add the chunks while searching: *)
+ let k_eob = search_next_boundary p in
+ (* Printf.printf "k_eob=%d\n" k_eob; *)
+ (* Is this the last boundary? *)
+ if check_whether_last_boundary k_eob then begin
+ (* Skip the rest: *)
+ while not (Netstream.at_eos s) do
+ Netstream.move s (Netstream.window_length s)
+ done;
+ Netstream.move s (Netstream.window_length s);
+ false
+ end
+ else begin
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line k_eob in
+ Netstream.move s k_eol;
+ true
+ end
+ with
+ any ->
+ (try stop p with _ -> ());
+ raise any
+ end in
+ stop p;
+ if continue then
+ (* Continue with next part: *)
+ parse_part()
+ in
+
+ (* Check whether s directly begins with a boundary: *)
+ if check_beginning_is_boundary() then begin
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line 0 in
+ Netstream.move s k_eol;
+ (* Begin with first part: *)
+ parse_part()
+ end
+ else begin
+ (* Search the first boundary: *)
+ try
+ let k_eob = search_first_boundary() in
+ (* Printf.printf "k_eob=%d\n" k_eob; *)
+ (* Move to the beginning of the next line: *)
+ let k_eol = search_end_of_line k_eob in
+ (* Printf.printf "k_eol=%d\n" k_eol; *)
+ Netstream.move s k_eol;
+ (* Begin with first part: *)
+ parse_part()
+ with
+ Not_found ->
+ (* No boundary at all: The body is empty. *)
+ ()
+ end;
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/13 00:04:36 gerd
+ * Encoded_word -> EncodedWord
+ * Bugfixes.
+ *
+ * Revision 1.7 2000/08/07 00:25:14 gerd
+ * Implemented the new functions for structured field lexing.
+ *
+ * Revision 1.6 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.5 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.4 2000/05/16 22:30:14 gerd
+ * Added support for some types of malformed MIME messages.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Collection of auxiliary functions to parse MIME headers *)
+(**********************************************************************)
+
+
+val scan_header :
+ ?unfold:bool ->
+ string -> start_pos:int -> end_pos:int ->
+ ((string * string) list * int)
+ (* let params, i2 = scan_header s i0 i1:
+ *
+ * DESCRIPTION
+ *
+ * Scans the MIME header that begins at position i0 in the string s
+ * and that must end somewhere before position i1. It is intended
+ * that in i1 the character position following the end of the body of the
+ * MIME message is passed.
+ * Returns the parameters of the header as (name,value) pairs (in
+ * params), and in i2 the position of the character following
+ * directly after the header (i.e. after the blank line separating
+ * the header from the body).
+ * The following normalizations have already been applied:
+ * - The names are all in lowercase
+ * - Newline characters (CR and LF) have been removed (unless
+ * ?unfold:false has been passed)
+ * - Whitespace at the beginning and at the end of values has been
+ * removed (unless ?unfold:false is specified)
+ * The rules of RFC 2047 have NOT been applied.
+ * The function fails if the header violates the header format
+ * strongly. (Some minor deviations are tolerated, e.g. it is sufficient
+ * to separate lines by only LF instead of CRLF.)
+ *
+ * OPTIONS:
+ *
+ * unfold: If true (the default), folded lines are concatenated and
+ * returned as one line. This means that CR and LF characters are
+ * deleted and that whitespace at the beginning and the end of the
+ * string is removed.
+ * You may set ?unfold:false to locate individual characters in the
+ * parameter value exactly.
+ *
+ * ABOUT MIME MESSAGE FORMAT:
+ *
+ * This is the modern name for messages in "E-Mail format". Messages
+ * consist of a header and a body; the first empty line separates both
+ * parts. The header contains lines "param-name: param-value" where
+ * the param-name must begin on column 0 of the line, and the ":"
+ * separates the name and the value. So the format is roughly:
+ *
+ * param1-name: param1-value
+ * ...
+ * paramN-name: paramN-value
+ *
+ * body
+ *
+ * This function wants in i0 the position of the first character of
+ * param1-name in the string, and in i1 the position of the character
+ * following the body. It returns as i2 the position where the body
+ * begins. Furthermore, in 'params' all parameters are returned that
+ * exist in the header.
+ *
+ * DETAILS
+ *
+ * Note that parameter values are restricted; you cannot represent
+ * arbitrary strings. The following problems can arise:
+ * - Values cannot begin with whitespace characters, because there
+ * may be an arbitrary number of whitespaces between the ':' and the
+ * value.
+ * - Values (and names of parameters, too) must only be formed of
+ * 7 bit ASCII characters. (If this is not enough, the MIME standard
+ * knows the extension RFC 2047 that allows that header values may
+ * be composed of arbitrary characters of arbitrary character sets.)
+ * - Header values may be broken into several lines, the continuation
+ * lines must begin with whitespace characters. This means that values
+ * must not contain line breaks as semantical part of the value.
+ * And it may mean that ONE whitespace character is not distinguishable
+ * from SEVERAL whitespace characters.
+ * - Header lines must not be longer than 76 characters. Values that
+ * would result into longer lines must be broken into several lines.
+ * This means that you cannot represent strings that contain too few
+ * whitespace characters.
+ * - Some gateways pad the lines with spaces at the end of the lines.
+ *
+ * This implementation of a MIME scanner tolerates a number of
+ * deviations from the standard: long lines are not rejected; 8 bit
+ * values are accepted; lines may be ended only with LF instead of
+ * CRLF.
+ * Furthermore, header values are transformed:
+ * - leading and trailing spaces are always removed
+ * - CRs and LFs are deleted; it is guaranteed that there is at least
+ * one space or tab where CR/LFs are deleted.
+ * Last but not least, the names of the header values are converted
+ * to lowercase; MIME specifies that they are case-independent.
+ *
+ * COMPATIBILITY WITH THE STANDARD
+ *
+ * This function can parse all MIME headers that conform to RFC 822.
+ * But there may be still problems, as RFC 822 allows some crazy
+ * representations that are actually not used in practice.
+ * In particular, RFC 822 allows it to use backslashes to "indicate"
+ * that a CRLF sequence is semantically meant as line break. As this
+ * function normally deletes CRLFs, it is not possible to recognize such
+ * indicators in the result of the function.
+ *)
+
+(**********************************************************************)
+
+(* The following types and functions allow it to build scanners for
+ * structured MIME values in a highly configurable way.
+ *
+ * WHAT ARE STRUCTURED VALUES?
+ *
+ * RFC 822 (together with some other RFCs) defines lexical rules
+ * how formal MIME header values should be divided up into tokens. Formal
+ * MIME headers are those headers that are formed according to some
+ * grammar, e.g. mail addresses or MIME types.
+ * Some of the characters separate phrases of the value; these are
+ * the "special" characters. For example, '@' is normally a special
+ * character for mail addresses, because it separates the user name
+ * from the domain name. RFC 822 defines a fixed set of special
+ * characters, but other RFCs use different sets. Because of this,
+ * the following functions allow it to configure the set of special characters.
+ * Every sequence of characters may be embraced by double quotes,
+ * which means that the sequence is meant as literal data item;
+ * special characters are not recognized inside a quoted string. You may
+ * use the backslash to insert any character (including double quotes)
+ * verbatim into the quoted string (e.g. "He said: \"Give it to me!\"").
+ * The sequence of a backslash character and another character is called
+ * a quoted pair.
+ * Structured values may contain comments. The beginning of a comment
+ * is indicated by '(', and the end by ')'. Comments may be nested.
+ * Comments may contain quoted pairs. A
+ * comment counts as if a space character were written instead of it.
+ * Control characters are the ASCII characters 0 to 31, and 127.
+ * RFC 822 demands that MIME headers are 7 bit ASCII strings. Because
+ * of this, this function also counts the characters 128 to 255 as
+ * control characters.
+ * Domain literals are strings embraced by '[' and ']'; such literals
+ * may contain quoted pairs. Today, domain literals are used to specify
+ * IP addresses.
+ * Every character sequence not falling in one of the above categories
+ * is an atom (a sequence of non-special and non-control characters).
+ * When recognized, atoms may be encoded in a character set different than
+ * US-ASCII; such atoms are called encoded words (see RFC 2047).
+ *
+ * EXTENDED INTERFACE:
+ *
+ * In order to scan a string containing a MIME value, you must first
+ * create a mime_scanner using the function create_mime_scanner.
+ * The scanner contains the reference to the scanned string, and a
+ * specification how the string is to be scanned. The specification
+ * consists of the lists 'specials' and 'scan_options'.
+ *
+ * The character list 'specials' specifies the set of special characters.
+ * These characters are returned as Special c token; the following additional
+ * rules apply:
+ *
+ * - Spaces:
+ * If ' ' in specials: A space character is returned as Special ' '.
+ * Note that there may also be an effect on how comments are returned
+ * (see below).
+ * If ' ' not in specials: Spaces are ignored.
+ *
+ * - Tabs, CRs, LFs:
+ * If '\t' in specials: A tab character is returned as Special '\t'.
+ * If '\t' not in specials: Tabs are ignored.
+ *
+ * If '\r' in specials: A CR character is returned as Special '\r'.
+ * If '\r' not in specials: CRs are ignored.
+ *
+ * If '\n' in specials: A LF character is returned as Special '\n'.
+ * If '\n' not in specials: LFs are ignored.
+ *
+ * - Comments:
+ * If '(' in specials: Comments are not recognized. The character '('
+ * is returned as Special '('.
+ * If '(' not in specials: Comments are recognized. How comments are
+ * returned, depends on the following:
+ * If Return_comments in scan_options: Outer comments are returned as
+ * Comment (note that inner comments count but
+ * are not returned as tokens)
+ * If otherwise ' ' in specials: Outer comments are returned as
+ * Special ' '
+ * Otherwise: Comments are recognized but ignored.
+ *
+ * - Quoted strings:
+ * If '"' in specials: Quoted strings are not recognized, and double quotes
+ * are returned as Special '"'.
+ * If '"' not in specials: Quoted strings are returned as QString tokens.
+ *
+ * - Domain literals:
+ * If '[' in specials: Domain literals are not recognized, and left brackets
+ * are returned as Special '['.
+ * If '[' not in specials: Domain literals are returned as DomainLiteral
+ * tokens.
+ *
+ * Note that the rule for domain literals is completely new in netstring-0.9.
+ * It may cause incompatibilities with previous versions if '[' is not
+ * special.
+ *
+ * The general rule for special characters: Every special character c is
+ * returned as Special c, and any additional scanning functionality
+ * for this character is turned off.
+ *
+ * If recognized, quoted strings are returned as QString s, where
+ * s is the string without the embracing quotes, and with already
+ * decoded quoted pairs.
+ *
+ * Control characters c are returned as Control c.
+ *
+ * If recognized, comments may either be returned as spaces (in the case
+ * you are not interested in the contents of comments), or as Comment tokens.
+ * The contents of comments are not further scanned; you must start a
+ * subscanner to analyze comments as structured values.
+ *
+ * If recognized, domain literals are returned as DomainLiteral s, where
+ * s is the literal without brackets, and with decoded quoted pairs.
+ *
+ * Atoms are returned as Atom s where s is a longest sequence of
+ * atomic characters (all characters which are neither special nor control
+ * characters nor delimiters for substructures). If the option
+ * Recognize_encoded_words is on, atoms which look like encoded words
+ * are returned as EncodedWord tokens. (Important note: Neither '?' nor
+ * '=' must be special in order to enable this functionality.)
+ *
+ * After the mime_scanner has been created, you can scan the tokens by
+ * invoking scan_token which returns one token at a time, or by invoking
+ * scan_token_list which returns all following tokens.
+ *
+ * There are two token types: s_token is the base type and is intended to
+ * be used for pattern matching. s_extended_token is a wrapper that
+ * additionally contains information where the token occurs.
+ *
+ * SIMPLE INTERFACE
+ *
+ * Instead of creating a mime_scanner and calling the scan functions,
+ * you may also invoke scan_structured_value. This function returns the
+ * list of tokens directly; however, it is restricted to s_token.
+ *
+ * EXAMPLES
+ *
+ * scan_structured_value "user@domain.com" [ '@'; '.' ] []
+ * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user @ domain . com" [ '@'; '.' ] []
+ * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] []
+ * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ]
+ * [ Return_comments ]
+ * = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.';
+ * Atom "com" ]
+ *
+ * scan_structured_value "user (Do you know him?) @ domain . com"
+ * [ '@'; '.'; ' ' ] []
+ * = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@';
+ * Special ' '; Atom "domain";
+ * Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user (Do you know him?) @ domain . com"
+ * [ '@'; '.'; ' ' ] [ Return_comments ]
+ * = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@';
+ * Special ' '; Atom "domain";
+ * Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] []
+ * = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
+ * Special ' '; Special '.'; Special ' '; Atom "com" ]
+ *
+ * scan_structured_value "user(Do you know him?)@domain.com" ['@'; '.'; '(']
+ * []
+ * = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
+ * Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+ *
+ * scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] []
+ * = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
+ * Atom "com" ]
+ *
+ * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ * [ ] [ ]
+ * = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
+ *
+ * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ * [ ] [ Recognize_encoded_words ]
+ * = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
+ *
+ *)
+
+
+
+type s_token =
+ Atom of string
+ | EncodedWord of (string * string * string)
+ | QString of string
+ | Control of char
+ | Special of char
+ | DomainLiteral of string
+ | Comment
+ | End
+
+(* - Words are: Atom, EncodedWord, QString.
+ * - Atom s: The character sequence forming the atom is contained in s
+ * - EncodedWord(charset, encoding, encoded_string) means:
+ * * charset is the (uppercase) character set
+ * * encoding is either "Q" or "B"
+ * * encoded_string: contains the text of the word; the text is represented
+ * as octet string following the conventions for character set charset and
+ * then encoded either as "Q" or "B" string.
+ * - QString s: Here, s are the characters inside the double quotes after
+ * decoding any quoted pairs (backslash + character pairs)
+ * - Control c: The control character c
+ * - Special c: The special character c
+ * - DomainLiteral s: s contains the characters inside the brackets after
+ * decoding any quoted pairs
+ * - Comment: if the option Return_comments is specified, this token
+ * represents the whole comment.
+ * - End: Is returned after the last token
+ *)
+
+
+type s_option =
+ No_backslash_escaping
+ (* Do not handle backslashes in quoted string and comments as escape
+ * characters; backslashes are handled as normal characters.
+ * For example: "C:\dir\file" will be returned as
+ * QString "C:\dir\file", and not as QString "C:dirfile".
+ * - This is a common error in many MIME implementations.
+ *)
+ | Return_comments
+ (* Comments are returned as token Comment (unless '(' is included
+ * in the list of special characters, in which case comments are
+ * not recognized at all).
+ * You may get the exact location of the comment by applying
+ * get_pos and get_length to the extended token.
+ *)
+ | Recognize_encoded_words
+ (* Enables that encoded words are recognized and returned as
+ * EncodedWord(charset,encoding,content) instead of Atom.
+ *)
+
+type s_extended_token
+ (* An opaque type containing s_token plus:
+ * - where the token occurs
+ * - RFC-2047 access functions
+ *)
+
+val get_token : s_extended_token -> s_token
+ (* Return the s_token within the s_extended_token *)
+
+val get_decoded_word : s_extended_token -> string
+val get_charset : s_extended_token -> string
+ (* Return the decoded word (the contents of the word after decoding the
+ * "Q" or "B" representation), and the character set of the decoded word
+ * (uppercase).
+ * These functions not only work for EncodedWord:
+ * - Atom: Returns the atom without decoding it
+ * - QString: Returns the characters inside the double quotes, and
+ * decodes any quoted pairs (backslash + character)
+ * - Control: Returns the one-character string
+ * - Special: Returns the one-character string
+ * - DomainLiteral: Returns the characters inside the brackets, and
+ * decodes any quoted pairs
+ * - Comment: Returns ""
+ * The character set is "US-ASCII" for these tokens.
+ *)
+
+val get_pos : s_extended_token -> int
+ (* Return the byte position where the token starts in the string
+ * (the first byte has position 0)
+ *)
+
+val get_line : s_extended_token -> int
+ (* Return the line number where the token starts (numbering begins
+ * usually with 1)
+ *)
+
+val get_column : s_extended_token -> int
+ (* Return the column of the line where the token starts (first column
+ * is number 0)
+ *)
+
+val get_length : s_extended_token -> int
+ (* Return the length of the token in bytes *)
+
+val separates_adjacent_encoded_words : s_extended_token -> bool
+ (* True iff the current token is white space (Special ' ', Special '\t',
+ * Special '\r' or Special '\n') and the last non-white space token
+ * was EncodedWord and the next non-white space token will be
+ * EncodedWord.
+ * Such spaces do not count and must be ignored by any application.
+ *)
+
+
+type mime_scanner
+
+val create_mime_scanner :
+ specials:char list ->
+ scan_options:s_option list ->
+ ?pos:int ->
+ ?line:int ->
+ ?column:int ->
+ string ->
+ mime_scanner
+ (* Creates a new mime_scanner scanning the passed string.
+ * specials: The list of characters recognized as special characters.
+ * scan_options: The list of global options modifying the behaviour
+ * of the scanner
+ * pos: The position of the byte where the scanner starts in the
+ * passed string. Defaults to 0.
+ * line: The line number of this byte. Defaults to 1.
+ * column: The column number of this byte. Default to 0.
+ *
+ * The optional parameters pos, line, column are intentionally after
+ * scan_options and before the string argument, so you can specify
+ * scanners by partially applying arguments to create_mime_scanner
+ * which are not yet connected with a particular string:
+ * let my_scanner_spec = create_mime_scanner my_specials my_options in
+ * ...
+ * let my_scanner = my_scanner_spec my_string in
+ * ...
+ *)
+
+val get_pos_of_scanner : mime_scanner -> int
+val get_line_of_scanner : mime_scanner -> int
+val get_column_of_scanner : mime_scanner -> int
+ (* Return the current position, line, and column of a mime_scanner.
+ * The primary purpose of these functions is to simplify switching
+ * from one mime_scanner to another within a string:
+ *
+ * let scanner1 = create_mime_scanner ... s in
+ * ... now scanning some tokens from s using scanner1 ...
+ * let scanner2 = create_mime_scanner ...
+ * ?pos:(get_pos_of_scanner scanner1)
+ * ?line:(get_line_of_scanner scanner1)
+ * ?column:(get_column_of_scanner scanner1)
+ * s in
+ * ... scanning more tokens from s using scanner2 ...
+ *
+ * RESTRICTION: These functions are not available if the option
+ * Recognize_encoded_words is on. The reason is that this option
+ * enables look-ahead scanning; please use the location of the last
+ * scanned token instead.
+ * It is currently not clear whether a better implementation is needed
+ * (costs a bit more time).
+ *
+ * Note: To improve the performance of switching, it is recommended to
+ * create scanner specs in advance (see the example my_scanner_spec
+ * above).
+ *)
+
+val scan_token : mime_scanner -> (s_extended_token * s_token)
+ (* Returns the next token, or End if there is no more token. *)
+
+val scan_token_list : mime_scanner -> (s_extended_token * s_token) list
+ (* Returns all following tokens as a list (excluding End) *)
+
+val scan_structured_value : string -> char list -> s_option list -> s_token list
+ (* This function is included for backwards compatibility, and for all
+ * cases not requiring extended tokens.
+ *
+ * It scans the passed string according to the list of special characters
+ * and the list of options, and returns the list of all tokens.
+ *)
+
+val specials_rfc822 : char list
+val specials_rfc2045 : char list
+ (* The sets of special characters defined by the RFCs 822 and 2045.
+ *
+ * CHANGE in netstring-0.9: '[' and ']' are no longer special because
+ * there is now support for domain literals.
+ * '?' and '=' are not special in the rfc2045 version because there is
+ * already support for encoded words.
+ *)
+
+
+(**********************************************************************)
+
+(* Widely used scanners: *)
+
+
+val scan_encoded_text_value : string -> s_extended_token list
+ (* Scans a "text" value. The returned token list contains only
+ * Special, Atom and EncodedWord tokens.
+ * Spaces, TABs, CRs, LFs are returned unless
+ * they occur between adjacent encoded words in which case
+ * they are ignored.
+ *)
+
+
+val scan_value_with_parameters : string -> s_option list ->
+ (string * (string * string) list)
+ (* let name, params = scan_value_with_parameters s options:
+ * Scans phrases like
+ * name ; p1=v1 ; p2=v2 ; ...
+ * The scan is done with the set of special characters [';', '='].
+ *)
+
+val scan_mime_type : string -> s_option list ->
+ (string * (string * string) list)
+ (* let name, params = scan_mime_type s options:
+ * Scans MIME types like
+ * text/plain; charset=iso-8859-1
+ * The name of the type and the names of the parameters are converted
+ * to lower case.
+ *)
+
+
+(**********************************************************************)
+
+(* Scanners for MIME bodies *)
+
+val scan_multipart_body : string -> start_pos:int -> end_pos:int ->
+ boundary:string ->
+ ((string * string) list * string) list
+ (* let [params1, value1; params2, value2; ...]
+ * = scan_multipart_body s i0 i1 b
+ *
+ * Scans the string s that is the body of a multipart message.
+ * The multipart message begins at position i0 in s and i1 the position
+ * of the character following the message. In b the boundary string
+ * must be passed (this is the "boundary" parameter of the multipart
+ * MIME type, e.g. multipart/mixed;boundary="some string" ).
+ * The return value is the list of the parts, where each part
+ * is returned as pair (params, value). The left component params
+ * is the list of name/value pairs of the header of the part. The
+ * right component is the RAW content of the part, i.e. if the part
+ * is encoded ("content-transfer-encoding"), the content is returned
+ * in the encoded representation. The caller must himself decode
+ * the content.
+ * The material before the first boundary and after the last
+ * boundary is not returned.
+ *
+ * MULTIPART MESSAGES
+ *
+ * The MIME standard defines a way to group several message parts to
+ * a larger message (for E-Mails this technique is known as "attaching"
+ * files to messages); these are the so-called multipart messages.
+ * Such messages are recognized by the major type string "multipart",
+ * e.g. multipart/mixed or multipart/form-data. Multipart types MUST
+ * have a boundary parameter because boundaries are essential for the
+ * representation.
+ * Multipart messages have a format like
+ *
+ * ...Header...
+ * Content-type: multipart/xyz; boundary="abc"
+ * ...Header...
+ *
+ * Body begins here ("prologue")
+ * --abc
+ * ...Header part 1...
+ *
+ * ...Body part 1...
+ * --abc
+ * ...Header part 2...
+ *
+ *
+ * ...Body part 2
+ * --abc
+ * ...
+ * --abc--
+ * Epilogue
+ *
+ * The parts are separated by boundary lines which begin with "--" and
+ * the string passed as boundary parameter. (Note that there may follow
+ * arbitrary text on boundary lines after "--abc".) The boundary is
+ * chosen such that it does not occur as prefix of any line of the
+ * inner parts of the message.
+ * The parts are again MIME messages, with header and body. Note
+ * that it is explicitely allowed that the parts are even multipart
+ * messages.
+ * The texts before the first boundary and after the last boundary
+ * are ignored.
+ * Note that multipart messages as a whole MUST NOT be encoded.
+ * Only the PARTS of the messages may be encoded (if they are not
+ * multipart messages themselves).
+ *
+ * Please read RFC 2046 if want to know the gory details of this
+ * brain-dead format.
+ *)
+
+val scan_multipart_body_and_decode : string -> start_pos:int -> end_pos:int ->
+ boundary:string ->
+ ((string * string) list * string) list
+ (* Same as scan_multipart_body, but decodes the bodies of the parts
+ * if they are encoded using the methods "base64" or "quoted printable".
+ * Fails, if an unknown encoding is used.
+ *)
+
+val scan_multipart_body_from_netstream
+ : Netstream.t ->
+ boundary:string ->
+ create:((string * string) list -> 'a) ->
+ add:('a -> Netstream.t -> int -> int -> unit) ->
+ stop:('a -> unit) ->
+ unit
+ (* scan_multipart_body_from_netstream s b create add stop:
+ *
+ * Reads the MIME message from the netstream s block by block. The
+ * parts are delimited by the boundary b.
+ *
+ * Once a new part is detected and begins, the function 'create' is
+ * called with the MIME header as argument. The result p of this function
+ * may be of any type.
+ *
+ * For every chunk of the part that is being read, the function 'add'
+ * is invoked: add p s k n.
+ * Here, p is the value returned by the 'create' invocation for the
+ * current part. s is the netstream. The current window of s contains
+ * the read chunk completely; the chunk begins at position k of the
+ * window (relative to the beginning of the window) and has a length
+ * of n bytes.
+ *
+ * When the part has been fully read, the function 'stop' is
+ * called with p as argument.
+ *
+ * That means, for every part the following is executed:
+ * - let p = create h
+ * - add p s k1 n1
+ * - add p s k2 n2
+ * - ...
+ * - add p s kN nN
+ * - stop p
+ *
+ * IMPORTANT PRECONDITION:
+ * - The block size of the netstream s must be at least
+ * String.length b + 3
+ *
+ * EXCEPTIONS:
+ * - Exceptions can happen because of ill-formed input, and within
+ * the callbacks of the functions 'create', 'add', 'stop'.
+ * - If the exception happens while part p is being read, and the
+ * 'create' function has already been called (successfully), the
+ * 'stop' function is also called (you have the chance to close files).
+ *)
+
+
+(* THREAD-SAFETY:
+ * The functions are thread-safe as long as the threads do not share
+ * values.
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/13 00:04:36 gerd
+ * Encoded_word -> EncodedWord
+ * Bugfixes.
+ *
+ * Revision 1.7 2000/08/07 00:25:00 gerd
+ * Major update of the interface for structured field lexing.
+ *
+ * Revision 1.6 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.5 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.4 2000/05/16 22:29:12 gerd
+ * New "option" arguments specifying the level of MIME
+ * compatibility.
+ *
+ * Revision 1.3 2000/04/15 13:09:01 gerd
+ * Implemented uploads to temporary files.
+ *
+ * Revision 1.2 2000/03/02 01:15:30 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/02/25 15:21:12 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type t =
+ { mutable buffer : string;
+ mutable length : int;
+ }
+
+(* To help the garbage collector:
+ * The 'buffer' has a minimum length of 31 bytes. This minimum can still
+ * be stored in the minor heap.
+ * The 'buffer' has a length which is always near a multiple of two. This
+ * limits the number of different bucket sizes, and simplifies reallocation
+ * of freed memory.
+ *)
+
+(* Optimal string length:
+ * Every string takes: 1 word for the header, enough words for the
+ * contents + 1 Null byte (for C compatibility).
+ * If the buffer grows, it is best to use a new string length such
+ * that the number of words is exactly twice as large as for the previous
+ * string.
+ * n: length of the previous string in bytes
+ * w: storage size of the previous string in words
+ * n': length of the new string in bytes
+ * w' = 2*w: storage size of the new string in words
+ *
+ * w = (n+1) / word_length + 1
+ * [it is assumed that (n+1) is always a multiple of word_length]
+ *
+ * n' = (2*w - 1) * word_length - 1
+ *
+ * n' = [2 * ( [n+1] / word_length + 1) - 1] * word_length - 1
+ * = ...
+ * = (2*n + 2) + word_length - 1
+ * = 2 * n + word_length + 1
+ *
+ * n'+1 is again a multiple of word_length:
+ * n'+1 = 2*n + 2 + word_length
+ * = 2*(n+1) + word_length
+ * = a multiple of word_length because n+1 is a multiple of word_length
+ *)
+
+let word_length = Sys.word_size / 8 (* in bytes *)
+
+let create n =
+ { buffer = String.create (max n 31); length = 0; }
+
+let contents b =
+ String.sub b.buffer 0 b.length
+
+let sub b ~pos:k ~len:n =
+ if k+n > b.length then
+ raise (Invalid_argument "Netbuffer.sub");
+ String.sub b.buffer k n
+
+let unsafe_buffer b =
+ b.buffer
+
+let length b =
+ b.length
+
+let add_string b s =
+ let l = String.length s in
+ if l + b.length > String.length b.buffer then begin
+ let l' = l + b.length in
+ let rec new_size s =
+ if s >= l' then s else new_size(2*s + word_length + 1)
+ in
+ let buffer' = String.create (new_size (String.length b.buffer)) in
+ String.blit b.buffer 0 buffer' 0 b.length;
+ b.buffer <- buffer'
+ end;
+ String.blit s 0 b.buffer b.length l;
+ b.length <- b.length + l
+
+let add_sub_string b s ~pos:k ~len:l =
+ if l + b.length > String.length b.buffer then begin
+ let l' = l + b.length in
+ let rec new_size s =
+ if s >= l' then s else new_size(2*s + word_length + 1)
+ in
+ let buffer' = String.create (new_size (String.length b.buffer)) in
+ String.blit b.buffer 0 buffer' 0 b.length;
+ b.buffer <- buffer'
+ end;
+ String.blit s k b.buffer b.length l;
+ b.length <- b.length + l
+
+let delete b ~pos:k ~len:l =
+ (* deletes l bytes at position k in b *)
+ let n = String.length b.buffer in
+ if k+l <> n & k <> n then
+ String.blit b.buffer (k+l) b.buffer k (n-l-k);
+ b.length <- b.length - l;
+ ()
+
+let try_shrinking b =
+ (* If the buffer size decreases drastically, reallocate the buffer *)
+ if b.length < (String.length b.buffer / 2) then begin
+ let rec new_size s =
+ if s >= b.length then s else new_size(2*s + word_length + 1)
+ in
+ let buffer' = String.create (new_size 31) in
+ String.blit b.buffer 0 buffer' 0 b.length;
+ b.buffer <- buffer'
+ end
+
+let clear b =
+ delete b 0 (b.length)
+
+let index_from b k c =
+ if k > b.length then
+ raise (Invalid_argument "Netbuffer.index_from");
+ let p = String.index_from b.buffer k c in
+ if p >= b.length then raise Not_found;
+ p
+
+let print_buffer b =
+ Format.printf
+ "<NETBUFFER: %d/%d>"
+ b.length
+ (String.length b.buffer)
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* A Netbuffer.t is a buffer that can grow and shrink dynamically. *)
+
+type t
+
+val create : int -> t
+ (* Creates a netbuffer which allocates initially this number of bytes.
+ * The logical length is zero.
+ *)
+
+val contents : t -> string
+ (* Returns the contents of the buffer as fresh string. *)
+
+val sub : t -> pos:int -> len:int -> string
+ (* sub nb k n: returns the n characters starting at position n from
+ * netbuffer nb as fresh string
+ *)
+
+val length : t -> int
+ (* Returns the logical length of the buffer *)
+
+val add_string : t -> string -> unit
+ (* add_string nb s: Adds a copy of the string s to the logical end of
+ * the netbuffer nb. If necessary, the nb grows.
+ *)
+
+val add_sub_string : t -> string -> pos:int -> len:int -> unit
+ (* add_sub_string nb s k n: Adds the substring of s starting at position
+ * k with length n to the logical end of the netbuffer nb. If necessary,
+ * the nb grows.
+ * This is semantically the same as
+ * add_string nb (String.sub s k n), but the extra copy is avoided.
+ *)
+
+val delete : t -> pos:int -> len:int -> unit
+ (* delete nb k n: Deletes the n bytes at position k of netbuffer nb
+ * in-place.
+ * The netbuffer does not shrink!
+ *)
+
+val clear : t -> unit
+ (* deletes all contents from the buffer. As 'delete', the netbuffer does
+ * not shrink.
+ *)
+
+val try_shrinking : t -> unit
+ (* try_shrinking nb: If the length of the buffer is less than half of
+ * the allocated space, the netbuffer is reallocated in order to save
+ * memory.
+ *)
+
+val index_from : t -> int -> char -> int
+ (* index_from nb k c: Searches the character c in the netbuffer beginning
+ * at position k. If found, the position of the left-most occurence is
+ * returned. Otherwise, Not_found is raised.
+ *)
+
+val unsafe_buffer : t -> string
+ (* WARNING! This is a low-level function!
+ * Returns the current string that internally holds the buffer.
+ * The byte positions 0 to length - 1 actually store the contents of
+ * the buffer. You can directly read and modify the buffer. Note that
+ * there is no protection if you read or write positions beyond the
+ * length of the buffer.
+ *)
+
+val print_buffer : t -> unit
+ (* For the toploop *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+
+type encoding =
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_java
+ | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *)
+ | `Enc_utf16_le (* UTF-16 little endian *)
+ | `Enc_utf16_be (* UTF-16 big endian *)
+ | `Enc_usascii (* US-ASCII (only 7 bit) *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ | `Enc_iso88592 (* ISO-8859-2 *)
+ | `Enc_iso88593 (* ISO-8859-3 *)
+ | `Enc_iso88594 (* ISO-8859-4 *)
+ | `Enc_iso88595 (* ISO-8859-5 *)
+ | `Enc_iso88596 (* ISO-8859-6 *)
+ | `Enc_iso88597 (* ISO-8859-7 *)
+ | `Enc_iso88598 (* ISO-8859-8 *)
+ | `Enc_iso88599 (* ISO-8859-9 *)
+ | `Enc_iso885910 (* ISO-8859-10 *)
+ | `Enc_iso885913 (* ISO-8859-13 *)
+ | `Enc_iso885914 (* ISO-8859-14 *)
+ | `Enc_iso885915 (* ISO-8859-15 *)
+ | `Enc_koi8r (* KOI8-R *)
+ | `Enc_jis0201 (* JIS-0201 *)
+ (* Microsoft: *)
+ | `Enc_windows1250 (* WINDOWS-1250 *)
+ | `Enc_windows1251 (* WINDOWS-1251 *)
+ | `Enc_windows1252 (* WINDOWS-1252 *)
+ | `Enc_windows1253 (* WINDOWS-1253 *)
+ | `Enc_windows1254 (* WINDOWS-1254 *)
+ | `Enc_windows1255 (* WINDOWS-1255 *)
+ | `Enc_windows1256 (* WINDOWS-1256 *)
+ | `Enc_windows1257 (* WINDOWS-1257 *)
+ | `Enc_windows1258 (* WINDOWS-1258 *)
+ (* IBM, ASCII-based: *)
+ | `Enc_cp437
+ | `Enc_cp737
+ | `Enc_cp775
+ | `Enc_cp850
+ | `Enc_cp852
+ | `Enc_cp855
+ | `Enc_cp856
+ | `Enc_cp857
+ | `Enc_cp860
+ | `Enc_cp861
+ | `Enc_cp862
+ | `Enc_cp863
+ | `Enc_cp864
+ | `Enc_cp865
+ | `Enc_cp866
+ | `Enc_cp869
+ | `Enc_cp874
+ | `Enc_cp1006
+ (* IBM, EBCDIC-based: *)
+ | `Enc_cp037
+ | `Enc_cp424
+ | `Enc_cp500
+ | `Enc_cp875
+ | `Enc_cp1026
+ (* Adobe: *)
+ | `Enc_adobe_standard_encoding
+ | `Enc_adobe_symbol_encoding
+ | `Enc_adobe_zapf_dingbats_encoding
+ (* Apple: *)
+ | `Enc_macroman
+
+ ]
+;;
+
+
+let norm_enc_name e =
+ (* Removes some characters from e; uppercase *)
+ let e' = String.create (String.length e) in
+ let rec next i j =
+ if i < String.length e then
+ match e.[i] with
+ ('-'|'_'|'.') -> next (i+1) j
+ | c -> e'.[j] <- c; next (i+1) (j+1)
+ else
+ j
+ in
+ let l = next 0 0 in
+ String.uppercase(String.sub e' 0 l)
+;;
+
+
+let encoding_of_string e =
+ match norm_enc_name e with
+ ("UTF16"|"UCS2"|"ISO10646UCS2") -> `Enc_utf16
+ | "UTF16BE" -> `Enc_utf16_be
+ | "UTF16LE" -> `Enc_utf16_le
+ | "UTF8" -> `Enc_utf8
+ | ("UTF8JAVA"|"JAVA") -> `Enc_java
+ | ("USASCII"|"ASCII"|"ISO646US"|"IBM367"|"CP367") -> `Enc_usascii
+ | ("ISO88591"|"LATIN1"|"IBM819"|"CP819") -> `Enc_iso88591
+ | ("ISO88592"|"LATIN2") -> `Enc_iso88592
+ | ("ISO88593"|"LATIN3") -> `Enc_iso88593
+ | ("ISO88594"|"LATIN4") -> `Enc_iso88594
+ | ("ISO88595"|"CYRILLIC") -> `Enc_iso88595
+ | ("ISO88596"|"ARABIC"|"ECMA114"|"ASMO708") -> `Enc_iso88596
+ | ("ISO88597"|"GREEK"|"GREEK8"|"ELOT928"|"ECMA118") -> `Enc_iso88597
+ | ("ISO88598"|"HEBREW") -> `Enc_iso88598
+ | ("ISO88599"|"LATIN5") -> `Enc_iso88599
+ | ("ISO885910"|"LATIN6") -> `Enc_iso885910
+ | "ISO885913" -> `Enc_iso885913
+ | "ISO885914" -> `Enc_iso885914
+ | "ISO885915" -> `Enc_iso885915
+ | "KOI8R" -> `Enc_koi8r
+ | ("JIS0201"|"JISX0201"|"X0201") -> `Enc_jis0201
+
+ | "WINDOWS1250" -> `Enc_windows1250
+ | "WINDOWS1251" -> `Enc_windows1251
+ | "WINDOWS1252" -> `Enc_windows1252
+ | "WINDOWS1253" -> `Enc_windows1253
+ | "WINDOWS1254" -> `Enc_windows1254
+ | "WINDOWS1255" -> `Enc_windows1255
+ | "WINDOWS1256" -> `Enc_windows1256
+ | "WINDOWS1257" -> `Enc_windows1257
+ | "WINDOWS1258" -> `Enc_windows1258
+
+ | ("CP437"|"IBM437") -> `Enc_cp437
+ | ("CP737"|"IBM737") -> `Enc_cp737
+ | ("CP775"|"IBM775") -> `Enc_cp775
+ | ("CP850"|"IBM850") -> `Enc_cp850
+ | ("CP852"|"IBM852") -> `Enc_cp852
+ | ("CP855"|"IBM855") -> `Enc_cp855
+ | ("CP856"|"IBM856") -> `Enc_cp856
+ | ("CP857"|"IBM857") -> `Enc_cp857
+ | ("CP860"|"IBM860") -> `Enc_cp860
+ | ("CP861"|"IBM861") -> `Enc_cp861
+ | ("CP862"|"IBM862") -> `Enc_cp862
+ | ("CP863"|"IBM863") -> `Enc_cp863
+ | ("CP864"|"IBM864") -> `Enc_cp864
+ | ("CP865"|"IBM865") -> `Enc_cp865
+ | ("CP866"|"IBM866") -> `Enc_cp866
+ | ("CP869"|"IBM869") -> `Enc_cp869
+ | ("CP874"|"IBM874") -> `Enc_cp874
+ | ("CP1006"|"IBM1006") -> `Enc_cp1006
+
+ | ("CP037"|"IBM037"|"EBCDICCPUS"|"EBCDICCPCA"|"EBCDICCPWT"|
+ "EBCDICCPNL") -> `Enc_cp037
+ | ("CP424"|"IBM424"|"EBCDICCPHE") -> `Enc_cp424
+ | ("CP500"|"IBM500"|"EBCDICCPBE"|"EBCDICCPCH") -> `Enc_cp500
+ | ("CP875"|"IBM875") -> `Enc_cp875
+ | ("CP1026"|"IBM1026") -> `Enc_cp1026
+
+ | "ADOBESTANDARDENCODING" -> `Enc_adobe_standard_encoding
+ | "ADOBESYMBOLENCODING" -> `Enc_adobe_symbol_encoding
+ | "ADOBEZAPFDINGBATSENCODING" -> `Enc_adobe_zapf_dingbats_encoding
+
+ | "MACINTOSH" -> `Enc_macroman
+
+ | _ ->
+ failwith "Netconversion.encoding_of_string: unknown encoding"
+;;
+
+
+let string_of_encoding (e : encoding) =
+ (* If there is a "preferred MIME name", this name is returned (see IANA). *)
+ match e with
+ `Enc_utf16 -> "UTF-16"
+ | `Enc_utf16_be -> "UTF-16-BE"
+ | `Enc_utf16_le -> "UTF-16-LE"
+ | `Enc_utf8 -> "UTF-8"
+ | `Enc_java -> "UTF-8-JAVA"
+ | `Enc_usascii -> "US-ASCII"
+ | `Enc_iso88591 -> "ISO-8859-1"
+ | `Enc_iso88592 -> "ISO-8859-2"
+ | `Enc_iso88593 -> "ISO-8859-3"
+ | `Enc_iso88594 -> "ISO-8859-4"
+ | `Enc_iso88595 -> "ISO-8859-5"
+ | `Enc_iso88596 -> "ISO-8859-6"
+ | `Enc_iso88597 -> "ISO-8859-7"
+ | `Enc_iso88598 -> "ISO-8859-8"
+ | `Enc_iso88599 -> "ISO-8859-9"
+ | `Enc_iso885910 -> "ISO-8859-10"
+ | `Enc_iso885913 -> "ISO-8859-13"
+ | `Enc_iso885914 -> "ISO-8859-14"
+ | `Enc_iso885915 -> "ISO-8859-15"
+ | `Enc_koi8r -> "KOI8-R"
+ | `Enc_jis0201 -> "JIS_X0201"
+ | `Enc_windows1250 -> "WINDOWS-1250"
+ | `Enc_windows1251 -> "WINDOWS-1251"
+ | `Enc_windows1252 -> "WINDOWS-1252"
+ | `Enc_windows1253 -> "WINDOWS-1253"
+ | `Enc_windows1254 -> "WINDOWS-1254"
+ | `Enc_windows1255 -> "WINDOWS-1255"
+ | `Enc_windows1256 -> "WINDOWS-1256"
+ | `Enc_windows1257 -> "WINDOWS-1257"
+ | `Enc_windows1258 -> "WINDOWS-1258"
+ | `Enc_cp437 -> "CP437"
+ | `Enc_cp737 -> "CP737"
+ | `Enc_cp775 -> "CP775"
+ | `Enc_cp850 -> "CP850"
+ | `Enc_cp852 -> "CP852"
+ | `Enc_cp855 -> "CP855"
+ | `Enc_cp856 -> "CP856"
+ | `Enc_cp857 -> "CP857"
+ | `Enc_cp860 -> "CP860"
+ | `Enc_cp861 -> "CP861"
+ | `Enc_cp862 -> "CP862"
+ | `Enc_cp863 -> "CP863"
+ | `Enc_cp864 -> "CP864"
+ | `Enc_cp865 -> "CP865"
+ | `Enc_cp866 -> "CP866"
+ | `Enc_cp869 -> "CP869"
+ | `Enc_cp874 -> "CP874"
+ | `Enc_cp1006 -> "CP1006"
+ | `Enc_cp037 -> "CP037"
+ | `Enc_cp424 -> "CP424"
+ | `Enc_cp500 -> "CP500"
+ | `Enc_cp875 -> "CP875"
+ | `Enc_cp1026 -> "CP1026"
+ | `Enc_adobe_standard_encoding -> "ADOBE-STANDARD-ENCODING"
+ | `Enc_adobe_symbol_encoding -> "ADOBE-SYMBOL-ENCODING"
+ | `Enc_adobe_zapf_dingbats_encoding -> "ADOBE-ZAPF-DINGBATS-ENCODING"
+ | `Enc_macroman -> "MACINTOSH"
+;;
+
+
+let read_iso88591 write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let p = Char.code s_in.[p_in + k_in] in
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_iso88591
+ else
+ scan (k_in + 1) (k_out + n) (c_out + 1)
+ end
+ else
+ k_in, k_out, `Enc_iso88591
+ in
+ scan 0 0 0
+;;
+
+
+let read_usascii write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let p = Char.code s_in.[p_in + k_in] in
+ if p >= 0x80 then raise Malformed_code;
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_usascii
+ else
+ scan (k_in + 1) (k_out + n) (c_out + 1)
+ end
+ else
+ k_in, k_out, `Enc_usascii
+ in
+ scan 0 0 0
+;;
+
+
+let read_8bit m_to_unicode enc write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let p_local = Char.code s_in.[p_in + k_in] in
+ let p_uni = Array.unsafe_get m_to_unicode p_local in
+ if p_uni < 0 then raise Malformed_code;
+ let n = write p_uni k_out c_out in
+ if n < 0 then
+ k_in, k_out, enc
+ else
+ scan (k_in + 1) (k_out + n) (c_out + 1)
+ end
+ else
+ k_in, k_out, enc
+ in
+ scan 0 0 0
+;;
+
+
+let read_utf8 is_java write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in < l_in then begin
+ let n_out, n_in =
+ match s_in.[p_in + k_in] with
+ '\000' ->
+ if is_java then raise Malformed_code;
+ write 0 k_out c_out, 1
+ | ('\001'..'\127' as c) ->
+ write (Char.code c) k_out c_out, 1
+ | ('\128'..'\223' as c) ->
+ if k_in + 1 >= l_in then
+ -1, 0
+ else begin
+ let n1 = Char.code c in
+ let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+ if is_java && (n1 = 0x80 && n2 = 0xc0) then
+ write 0 k_out c_out, 2
+ else begin
+ if n2 < 128 or n2 > 191 then raise Malformed_code;
+ let p = ((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) in
+ if p < 128 then raise Malformed_code;
+ write p k_out c_out, 2
+ end
+ end
+ | ('\224'..'\239' as c) ->
+ if k_in + 2 >= l_in then
+ -1, 0
+ else begin
+ let n1 = Char.code c in
+ let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+ let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+ if n2 < 128 or n2 > 191 then raise Malformed_code;
+ if n3 < 128 or n3 > 191 then raise Malformed_code;
+ let p =
+ ((n1 land 0b1111) lsl 12) lor
+ ((n2 land 0b111111) lsl 6) lor
+ (n3 land 0b111111)
+ in
+ if p < 0x800 then raise Malformed_code;
+ if (p >= 0xd800 && p < 0xe000) then
+ (* Surrogate pairs are not supported in UTF-8 *)
+ raise Malformed_code;
+ if (p >= 0xfffe && p <= 0xffff) then
+ raise Malformed_code;
+ write p k_out c_out, 3
+ end
+ | ('\240'..'\247' as c) ->
+ if k_in + 3 >= l_in then
+ -1, 0
+ else begin
+ let n1 = Char.code c in
+ let n2 = Char.code (s_in.[p_in + k_in + 1]) in
+ let n3 = Char.code (s_in.[p_in + k_in + 2]) in
+ let n4 = Char.code (s_in.[p_in + k_in + 3]) in
+ if n2 < 128 or n2 > 191 then raise Malformed_code;
+ if n3 < 128 or n3 > 191 then raise Malformed_code;
+ if n4 < 128 or n4 > 191 then raise Malformed_code;
+ let p = ((n1 land 0b111) lsl 18) lor
+ ((n2 land 0b111111) lsl 12) lor
+ ((n3 land 0b111111) lsl 6) lor
+ (n4 land 0b111111)
+ in
+ if p < 0x10000 then raise Malformed_code;
+ if p >= 0x110000 then
+ (* These code points are not supported. *)
+ raise Malformed_code;
+ write p k_out c_out, 4
+ end
+ | _ ->
+ (* Outside the valid range of XML characters *)
+ raise Malformed_code;
+ in
+ (* n_out: number of written bytes; -1 means out buf is full
+ * n_in: number of read bytes; 0 means end of in buf reached
+ * n_in = 0 implies n_out = -1
+ *)
+ if n_out < 0 then
+ k_in, k_out, `Enc_utf8
+ else
+ scan (k_in + n_in) (k_out + n_out) (c_out + 1)
+ end
+ else
+ k_in, k_out, `Enc_utf8
+ in
+ scan 0 0 0
+;;
+
+
+let surrogate_offset = 0x10000 - (0xD800 lsl 10) - 0xDC00;;
+
+let read_utf16_le k_in_0 write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in + 1 < l_in then begin
+ let p = (Char.code s_in.[p_in + k_in]) lor ((Char.code s_in.[p_in + k_in + 1]) lsl 8) in
+
+ if p >= 0xd800 & p < 0xe000 then begin
+ (* This is a surrogate pair. *)
+ if k_in + 3 < l_in then begin
+ if p <= 0xdbff then begin
+ let q = (Char.code s_in.[p_in + k_in + 2 ]) lor
+ ((Char.code s_in.[p_in + k_in + 3]) lsl 8) in
+ if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+ let eff_p = (p lsl 10) + q + surrogate_offset in
+ let n = write eff_p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_le
+ else
+ scan (k_in + 4) (k_out + n) (c_out + 1)
+ end
+ else
+ (* Malformed pair: *)
+ raise Malformed_code;
+ end
+ else
+ (* Incomplete pair: *)
+ k_in, k_out, `Enc_utf16_le
+ end
+
+ else
+ if p = 0xfffe then
+ (* Big endian byte order mark: It is illegal here *)
+ raise Malformed_code
+ else begin
+ (* A regular code point *)
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_le
+ else
+ scan (k_in + 2) (k_out + n) (c_out + 1)
+ end
+ end
+ else
+ (* Incomplete character: *)
+ k_in, k_out, `Enc_utf16_le
+ in
+ scan k_in_0 0 0
+;;
+
+
+let read_utf16_be k_in_0 write s_in p_in l_in =
+ let rec scan k_in k_out c_out =
+ if k_in + 1 < l_in then begin
+ let p = (Char.code s_in.[p_in + k_in + 1]) lor ((Char.code s_in.[p_in + k_in]) lsl 8) in
+
+ if p >= 0xd800 & p < 0xe000 then begin
+ (* This is a surrogate pair. *)
+ if k_in + 3 < l_in then begin
+ if p <= 0xdbff then begin
+ let q = (Char.code s_in.[p_in + k_in + 3 ]) lor
+ ((Char.code s_in.[p_in + k_in + 2]) lsl 8) in
+ if q < 0xdc00 or q > 0xdfff then raise Malformed_code;
+ let eff_p = (p lsl 10) + q + surrogate_offset in
+ let n = write eff_p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_be
+ else
+ scan (k_in + 4) (k_out + n) (c_out + 1)
+ end
+ else
+ (* Malformed pair: *)
+ raise Malformed_code;
+ end
+ else
+ (* Incomplete pair: *)
+ k_in, k_out, `Enc_utf16_be
+ end
+
+ else
+ if p = 0xfffe then
+ (* Little endian byte order mark: It is illegal here *)
+ raise Malformed_code
+ else begin
+ (* A regular code point *)
+ let n = write p k_out c_out in
+ if n < 0 then
+ k_in, k_out, `Enc_utf16_be
+ else
+ scan (k_in + 2) (k_out + n) (c_out + 1)
+ end
+
+ end
+ else
+ (* Incomplete character: *)
+ k_in, k_out, `Enc_utf16_be
+ in
+ scan k_in_0 0 0
+;;
+
+
+let read_utf16 write s_in p_in l_in =
+ (* Expect a BOM at the beginning of the text *)
+ if l_in >= 2 then begin
+ let c0 = s_in.[p_in + 0] in
+ let c1 = s_in.[p_in + 1] in
+ if c0 = '\254' & c1 = '\255' then begin
+ (* 0xfeff as big endian *)
+ read_utf16_be 2 write s_in p_in l_in
+ end
+ else
+ if c0 = '\255' & c1 = '\254' then begin
+ (* 0xfeff as little endian *)
+ read_utf16_le 2 write s_in p_in l_in
+ end
+ else
+ (* byte order mark missing *)
+ raise Malformed_code
+ end
+ else
+ 0, 0, `Enc_utf16
+;;
+
+
+let write_iso88591 s_out p_out l_out max_chars w p k_out c_out =
+ if k_out < l_out && c_out < max_chars then begin
+ if p > 255 then begin
+ let subst = w p in
+ let l_subst = String.length subst in
+ if k_out + l_subst <= l_out then begin
+ (* Enough space to store 'subst': *)
+ String.blit subst 0 s_out (k_out+p_out) l_subst;
+ l_subst
+ end
+ else
+ (* Not enough space: Stop this round of recoding *)
+ -1
+ end
+ else begin
+ s_out.[p_out + k_out] <- Char.chr p;
+ 1
+ end
+ end
+ else
+ -1 (* End-of-buffer indicator *)
+;;
+
+
+let write_usascii s_out p_out l_out max_chars w p k_out c_out =
+ if k_out < l_out && c_out < max_chars then begin
+ if p > 127 then begin
+ let subst = w p in
+ let l_subst = String.length subst in
+ if k_out + l_subst <= l_out then begin
+ (* Enough space to store 'subst': *)
+ String.blit subst 0 s_out (k_out+p_out) l_subst;
+ l_subst
+ end
+ else
+ (* Not enough space: Stop this round of recoding *)
+ -1
+ end
+ else begin
+ s_out.[p_out + k_out] <- Char.chr p;
+ 1
+ end
+ end
+ else
+ -1 (* End-of-buffer indicator *)
+;;
+
+
+let write_8bit from_unicode s_out p_out l_out max_chars w p k_out c_out =
+ if k_out < l_out && c_out < max_chars then begin
+ let p' =
+ match Array.unsafe_get from_unicode (p land 255) with
+ Netmappings.U_nil -> -1
+ | Netmappings.U_single (p0,q0) ->
+ if p0 = p then q0 else -1
+ | Netmappings.U_list l ->
+ (try List.assoc p l with Not_found -> -1)
+ in
+ if p' < 0 then begin
+ let subst = w p in
+ let l_subst = String.length subst in
+ if k_out + l_subst <= l_out then begin
+ (* Enough space to store 'subst': *)
+ String.blit subst 0 s_out (k_out+p_out) l_subst;
+ l_subst
+ end
+ else
+ (* Not enough space: Stop this round of recoding *)
+ -1
+ end
+ else begin
+ s_out.[p_out + k_out] <- Char.chr p';
+ 1
+ end
+ end
+ else
+ -1 (* End-of-buffer indicator *)
+;;
+
+
+let write_utf8 is_java s_out p_out l_out max_chars w p k_out c_out =
+ if p <= 127 && (not is_java || p <> 0) then begin
+ if k_out < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr p;
+ 1
+ end
+ else -1
+ end
+ else if p <= 0x7ff then begin
+ if k_out + 1 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr (0xc0 lor (p lsr 6));
+ s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor (p land 0x3f));
+ 2
+ end
+ else -1
+ end
+ else if p <= 0xffff then begin
+ (* Refuse writing surrogate pairs, and fffe, ffff *)
+ if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then
+ failwith "Netconversion.write_utf8";
+ if k_out + 2 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr (0xe0 lor (p lsr 12));
+ s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
+ s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor (p land 0x3f));
+ 3
+ end
+ else -1
+ end
+ else if p <= 0x10ffff then begin
+ if k_out + 3 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out] <- Char.chr (0xf0 lor (p lsr 18));
+ s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 12) land 0x3f));
+ s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f));
+ s_out.[p_out + k_out + 3] <- Char.chr (0x80 lor (p land 0x3f));
+ 4
+ end
+ else -1
+ end
+ else
+ (* Higher code points are not possible in XML: *)
+ failwith "Netconversion.write_utf8"
+;;
+
+
+let write_utf16_le s_out p_out l_out max_chars w p k_out c_out =
+ if p >= 0xfffe then begin
+ if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_le";
+ (* Must be written as surrogate pair *)
+ if k_out + 3 < l_out && c_out < max_chars then begin
+ let high = (p lsr 10) + 0xd800 in
+ let low = (p land 0x3ff) + 0xdc00 in
+ s_out.[p_out + k_out ] <- Char.chr (high land 0xff);
+ s_out.[p_out + k_out + 1] <- Char.chr (high lsr 8);
+ s_out.[p_out + k_out + 2] <- Char.chr (low land 0xff);
+ s_out.[p_out + k_out + 3] <- Char.chr (low lsr 8);
+ 4
+ end
+ else -1
+ end
+ else begin
+ if k_out + 1 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out ] <- Char.chr (p land 0xff);
+ s_out.[p_out + k_out + 1] <- Char.chr (p lsr 8);
+ 2
+ end
+ else
+ -1
+ end
+;;
+
+
+let write_utf16_be s_out p_out l_out max_chars w p k_out c_out =
+ if p >= 0xfffe then begin
+ if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_be";
+ (* Must be written as surrogate pair *)
+ if k_out + 3 < l_out && c_out < max_chars then begin
+ let high = (p lsr 10) + 0xd800 in
+ let low = (p land 0x3ff) + 0xdc00 in
+ s_out.[p_out + k_out + 1] <- Char.chr (high land 0xff);
+ s_out.[p_out + k_out ] <- Char.chr (high lsr 8);
+ s_out.[p_out + k_out + 3] <- Char.chr (low land 0xff);
+ s_out.[p_out + k_out + 2] <- Char.chr (low lsr 8);
+ 4
+ end
+ else -1
+ end
+ else begin
+ if k_out + 1 < l_out && c_out < max_chars then begin
+ s_out.[p_out + k_out + 1] <- Char.chr (p land 0xff);
+ s_out.[p_out + k_out ] <- Char.chr (p lsr 8);
+ 2
+ end
+ else
+ -1
+ end
+;;
+
+
+let recode ~in_enc
+ ~in_buf
+ ~in_pos
+ ~in_len
+ ~out_enc
+ ~out_buf
+ ~out_pos
+ ~out_len
+ ~max_chars
+ ~subst =
+ if (in_pos < 0 || in_len < 0 || in_pos + in_len > String.length in_buf ||
+ out_pos < 0 || out_len < 0 || out_pos + out_len > String.length out_buf)
+ then
+ invalid_arg "Netconversion.recode";
+
+ let reader =
+ match in_enc with
+ `Enc_iso88591 -> read_iso88591
+ | `Enc_usascii -> read_usascii
+ | `Enc_utf8 -> read_utf8 false
+ | `Enc_java -> read_utf8 true
+ | `Enc_utf16 -> read_utf16
+ | `Enc_utf16_le -> read_utf16_le 0
+ | `Enc_utf16_be -> read_utf16_be 0
+ | _ ->
+ (try
+ let to_unicode' = Hashtbl.find Netmappings.to_unicode in_enc in
+ let to_unicode =
+ Netmappings.lock();
+ Lazy.force to_unicode' in
+ Netmappings.unlock();
+ read_8bit to_unicode in_enc
+ with
+ Not_found ->
+ failwith("Support for the encoding `" ^
+ string_of_encoding in_enc ^
+ "' has not been compiled into Netstring")
+ )
+ in
+ let writer =
+ match out_enc with
+ `Enc_iso88591 -> write_iso88591 out_buf out_pos out_len max_chars subst
+ | `Enc_usascii -> write_usascii out_buf out_pos out_len max_chars subst
+ | `Enc_utf8 -> write_utf8 false
+ out_buf out_pos out_len max_chars subst
+ | `Enc_java -> write_utf8 true out_buf out_pos out_len max_chars subst
+ | `Enc_utf16 -> failwith "Netconversion.recode"
+ | `Enc_utf16_le -> write_utf16_le out_buf out_pos out_len max_chars subst
+ | `Enc_utf16_be -> write_utf16_be out_buf out_pos out_len max_chars subst
+ | _ ->
+ (try
+ let from_unicode' = Hashtbl.find Netmappings.from_unicode out_enc
+ in
+ let from_unicode =
+ Netmappings.lock();
+ Lazy.force from_unicode' in
+ Netmappings.unlock();
+ write_8bit from_unicode out_buf out_pos out_len max_chars subst
+ with
+ Not_found ->
+ failwith("Support for the encoding `" ^
+ string_of_encoding out_enc ^
+ "' has not been compiled into Netstring")
+ )
+ in
+ reader writer in_buf in_pos in_len
+;;
+
+
+let makechar enc p =
+ match enc with
+ `Enc_iso88591 ->
+ if p > 255 then raise Not_found;
+ String.make 1 (Char.chr p)
+ | `Enc_usascii ->
+ if p > 127 then raise Not_found;
+ String.make 1 (Char.chr p)
+ | `Enc_utf8 ->
+ let s = String.create 4 in
+ let n = write_utf8 false s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_java ->
+ let s = String.create 4 in
+ let n = write_utf8 true s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_utf16_le ->
+ let s = String.create 4 in
+ let n = write_utf16_le s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_utf16_be ->
+ let s = String.create 4 in
+ let n = write_utf16_be s 0 4 1 (fun _ -> raise Not_found) p 0 0 in
+ String.sub s 0 n
+ | `Enc_utf16 ->
+ failwith "Netconversion.makechar"
+ | _ ->
+ let s = String.create 1 in
+ let from_unicode' =
+ try
+ Hashtbl.find Netmappings.from_unicode enc
+ with
+ Not_found ->
+ failwith("Support for the encoding `" ^
+ string_of_encoding enc ^
+ "' has not been compiled into Netstring")
+ in
+ let from_unicode =
+ Netmappings.lock();
+ Lazy.force from_unicode' in
+ Netmappings.unlock();
+ let n =
+ write_8bit from_unicode s 0 1 1 (fun _ -> raise Not_found) p 0 0 in
+ s
+;;
+
+
+let recode_string ~in_enc ~out_enc ?(subst = (fun _ -> raise Not_found)) s =
+
+ let length = String.length s in
+ let size = 1024 in
+ let out_buf = String.create size in
+
+ let rec recode_loop k s_done in_enc =
+ (* 'k' bytes of 's' have already been processed, and the result is in
+ * 's_done'.
+ *)
+ (* Recode to 'out_buf': *)
+ let in_len = length - k in
+ let in_done, out_done, in_enc' =
+ recode ~in_enc:in_enc ~in_buf:s ~in_pos:k ~in_len:in_len
+ ~out_enc:out_enc ~out_buf:out_buf ~out_pos:0 ~out_len:size
+ ~max_chars:size ~subst:subst in
+ (* Collect the results: *)
+ let k' = k + in_done in
+ let s_done' = String.sub out_buf 0 out_done :: s_done in
+ (* Still something to do? *)
+ if k' < length then
+ recode_loop k' s_done' in_enc'
+ else
+ (* No: Concatenate s_done' to get the final result. *)
+ String.concat "" (List.rev s_done')
+ in
+
+ recode_loop 0 [] in_enc
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/29 00:46:41 gerd
+ * New type for the Unicode to 8 bit translation table.
+ * The Netmappings tables are now Lazy.t.
+ *
+ * Revision 1.1 2000/08/13 00:02:57 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.ml):
+ *
+ * Revision 1.5 2000/07/27 00:41:14 gerd
+ * new 8 bit codes
+ *
+ * Revision 1.4 2000/07/04 22:11:41 gerd
+ * Implemented the enhancements and extensions of
+ * rev. 1.4 of pxp_encoding.mli.
+ *
+ * Revision 1.3 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+exception Malformed_code
+
+(* Encodings:
+ * - With the exception of UTF-8 and UTF-16, only single-byte character sets
+ * are supported.
+ * - I took the mappings from www.unicode.org, and the standard names of
+ * the character sets from IANA. Obviously, many character sets are missing
+ * that can be supported; especially ISO646 character sets, many EBCDIC
+ * code pages.
+ * - Because of the copyright statement from Unicode, I cannot put the
+ * source tables that describe the mappings into the distribution. They
+ * are publicly available from www.unicode.org.
+ * - Because of this, it is difficult for you to extend the list of character
+ * sets; you need the source tables I am not allowed to distribute.
+ * These tables have a very simple format: Every line describes a pair
+ * of code points; the left code (<= 0xff) is the code in the character
+ * set, the right code (<= 0xffff) is the Unicode equivalent.
+ * For an example, see
+ * http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT
+ * You can send me such files, and I will integrate them into the
+ * distribution (if possible).
+ * - I really do not know very much about the character sets used in
+ * East Asia. If you need them, please write the necessary conversion
+ * functions and send them to me.
+ *
+ * KNOWN PROBLEMS:
+ * - The following charsets do not have a bijective mapping to Unicode:
+ * adobe_standard_encoding, adobe_symbol_encoding,
+ * adobe_zapf_dingbats_encoding, cp1002 (0xFEBE). The current implementation
+ * simply removes one of the conflicting code point pairs - this might
+ * not what you want.
+ *)
+
+type encoding =
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_java (* The variant of UTF-8 used by Java *)
+ | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *)
+ | `Enc_utf16_le (* UTF-16 little endian *)
+ | `Enc_utf16_be (* UTF-16 big endian *)
+ | `Enc_usascii (* US-ASCII (only 7 bit) *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ | `Enc_iso88592 (* ISO-8859-2 *)
+ | `Enc_iso88593 (* ISO-8859-3 *)
+ | `Enc_iso88594 (* ISO-8859-4 *)
+ | `Enc_iso88595 (* ISO-8859-5 *)
+ | `Enc_iso88596 (* ISO-8859-6 *)
+ | `Enc_iso88597 (* ISO-8859-7 *)
+ | `Enc_iso88598 (* ISO-8859-8 *)
+ | `Enc_iso88599 (* ISO-8859-9 *)
+ | `Enc_iso885910 (* ISO-8859-10 *)
+ | `Enc_iso885913 (* ISO-8859-13 *)
+ | `Enc_iso885914 (* ISO-8859-14 *)
+ | `Enc_iso885915 (* ISO-8859-15 *)
+ | `Enc_koi8r (* KOI8-R *)
+ | `Enc_jis0201 (* JIS-0201 *)
+ (* Microsoft: *)
+ | `Enc_windows1250 (* WINDOWS-1250 *)
+ | `Enc_windows1251 (* WINDOWS-1251 *)
+ | `Enc_windows1252 (* WINDOWS-1252 *)
+ | `Enc_windows1253 (* WINDOWS-1253 *)
+ | `Enc_windows1254 (* WINDOWS-1254 *)
+ | `Enc_windows1255 (* WINDOWS-1255 *)
+ | `Enc_windows1256 (* WINDOWS-1256 *)
+ | `Enc_windows1257 (* WINDOWS-1257 *)
+ | `Enc_windows1258 (* WINDOWS-1258 *)
+ (* IBM, ASCII-based: *)
+ | `Enc_cp437
+ | `Enc_cp737
+ | `Enc_cp775
+ | `Enc_cp850
+ | `Enc_cp852
+ | `Enc_cp855
+ | `Enc_cp856
+ | `Enc_cp857
+ | `Enc_cp860
+ | `Enc_cp861
+ | `Enc_cp862
+ | `Enc_cp863
+ | `Enc_cp864
+ | `Enc_cp865
+ | `Enc_cp866
+ | `Enc_cp869
+ | `Enc_cp874
+ | `Enc_cp1006
+ (* IBM, EBCDIC-based: *)
+ | `Enc_cp037
+ | `Enc_cp424
+ | `Enc_cp500
+ | `Enc_cp875
+ | `Enc_cp1026
+ (* Adobe: *)
+ | `Enc_adobe_standard_encoding
+ | `Enc_adobe_symbol_encoding
+ | `Enc_adobe_zapf_dingbats_encoding
+ (* Apple: *)
+ | `Enc_macroman
+
+ ]
+
+
+val encoding_of_string : string -> encoding;;
+ (* Returns the encoding of the name of the encoding. Fails if the
+ * encoding is unknown.
+ * E.g. encoding_of_string "iso-8859-1" = `Enc_iso88591
+ *)
+
+val string_of_encoding : encoding -> string;;
+ (* Returns the name of the encoding. *)
+
+
+val makechar : encoding -> int -> string
+ (* makechar enc i:
+ * Creates the string representing the code point i in encoding enc.
+ * Raises Not_found if the character is legal but cannot be represented
+ * in enc.
+ *
+ * Possible encodings: everything but `Enc_utf16.
+ *)
+
+val recode : in_enc:encoding ->
+ in_buf:string ->
+ in_pos:int ->
+ in_len:int ->
+ out_enc:encoding ->
+ out_buf:string ->
+ out_pos:int ->
+ out_len:int ->
+ max_chars:int ->
+ subst:(int -> string) -> (int * int * encoding)
+ (*
+ * let (in_n, out_n, in_enc') =
+ * recode in_enc in_buf in_len out_enc out_buf out_pos out_len max_chars
+ * subst:
+ * Converts the character sequence contained in the at most in_len bytes
+ * of in_buf starting at position in_pos, and writes the result
+ * into at most out_len bytes of out_buf starting at out_pos.
+ * At most max_chars are written into out_buf.
+ * The characters in in_buf are assumed to be encoded as in_enc, and the
+ * characters in out_buf will be encoded as out_enc.
+ * If there is a code point which cannot be represented in out_enc,
+ * the function subst is called with the code point as argument, and the
+ * resulting string (which must already be encoded as out_enc) is
+ * inserted instead.
+ * Note: It is possible that subst is called several times for the same
+ * character.
+ * Return value: out_n is the actual number of bytes written into out_buf.
+ * in_n is the actual number of bytes that have been converted from
+ * in_buf; in_n may be smaller than in_len because of incomplete
+ * multi-byte characters, or because the output buffer has less space
+ * for characters than the input buffer, or because of a change
+ * of the encoding variant.
+ * If there is at least one complete character in in_buf, and at least
+ * space for one complete character in out_buf, and max_chars >= 1, it is
+ * guaranteed that in_n > 0 or out_n > 0.
+ * in_enc' is normally identical to in_enc. However, there are cases
+ * in which the encoding can be refined when looking at the byte
+ * sequence; for example whether a little endian or big endian variant
+ * of the encoding is used. in_enc' is the variant of in_enc that was
+ * used for the last character that has been converted.
+ *
+ * NOTES:
+ *
+ * Supported range of code points: 0 to 0xd7ff, 0xe000 to 0xfffd,
+ * 0x10000 to 0x10ffff.
+ *
+ * Enc_utf8: Malformed UTF-8 byte sequences are always rejected. This
+ * is also true for the sequence 0xc0 0x80 which is used by some software
+ * (Java) as paraphrase for the code point 0.
+ *
+ * Enc_utf16: When reading from a string encoded as Enc_utf16, a byte
+ * order mark is expected at the beginning. The detected variant
+ * (Enc_utf16_le or Enc_utf16_be) is returned. The byte order mark is
+ * not included into the output string. - It is not possible to
+ * write as Enc_utf16.
+ *
+ * Enc_utf16_le, Enc_utf16_be: When reading from such a string, the
+ * code point 0xfeff is returned as it is; it is a "zero-width
+ * non-breaking space". The code point 0xfffe is rejected.
+ *
+ * Surrogate pairs: These are recognized (or written) only for a
+ * UTF-16 encoding; and rejected for any other encoding.
+ *
+ * Rejected byte sequences cause the exception Bad_character_stream.
+ *)
+
+val recode_string : in_enc:encoding ->
+ out_enc:encoding ->
+ ?subst:(int -> string) ->
+ string ->
+ string
+ (* Recodes a complete string from in_enc to out_enc, and returns it.
+ * The function subst is invoked for code points of in_enc that cannot
+ * be represented in out_enc, and the result of the function invocation
+ * is substituted.
+ * If subst is missing, Not_found is raised in this case.
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/08/13 00:02:57 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.mli):
+ *
+ * Revision 1.4 2000/07/04 22:05:58 gerd
+ * Enhanced version of 'recode'. Labeled arguments.
+ * New function 'recode_string'.
+ *
+ * Revision 1.3 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.2 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+module Str = Netstring_str;;
+
+module Base64 = struct
+ let b64_pattern plus slash =
+ [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+ 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
+ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+ 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
+ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;
+
+
+ let rfc_pattern = b64_pattern '+' '/';;
+ let url_pattern = b64_pattern '-' '/';;
+
+ let encode_with_options b64 equal s pos len linelen crlf =
+ (* encode using "base64".
+ * 'b64': The encoding table, created by b64_pattern.
+ * 'equal': The character that should be used instead of '=' in the original
+ * encoding scheme. Pass '=' to get the original encoding scheme.
+ * s, pos, len, linelen: See the interface description of encode_substring.
+ *)
+ assert (Array.length b64 = 64);
+ if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
+ invalid_arg "Netencoding.Base64.encode_with_options";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Base64.encode_with_options";
+
+ let linelen =
+ (linelen/4) * 4 in
+
+ let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
+ (* l_t: length of the result without additional line endings *)
+
+ let l_t' =
+ if linelen < 4 then
+ l_t
+ else
+ if l_t = 0 then 0 else
+ let n_lines = ((l_t - 1) / linelen) + 1 in
+ l_t + n_lines * (if crlf then 2 else 1)
+ in
+ (* l_t': length of the result with CRLF or LF characters *)
+
+ let t = String.make l_t' equal in
+ let j = ref 0 in
+ let q = ref 0 in
+ for k = 0 to len / 3 - 1 do
+ let p = pos + 3*k in
+ (* p >= pos >= 0: this is evident
+ * p+2 < pos+len <= String.length s:
+ * Because k <= len/3-1
+ * 3*k <= 3*(len/3-1) = len - 3
+ * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
+ * So it is proved that the following unsafe string accesses always
+ * work.
+ *)
+ let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor
+ (Char.code (String.unsafe_get s (p+1)) lsl 8) lor
+ (Char.code (String.unsafe_get s (p+2))) in
+ (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
+ assert(!j + 3 < l_t');
+ String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18));
+ String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
+ String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63));
+ String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63));
+ j := !j + 4;
+ if linelen > 3 then begin
+ q := !q + 4;
+ if !q + 4 > linelen then begin
+ (* The next 4 characters won't fit on the current line. So insert
+ * a line ending.
+ *)
+ if crlf then begin
+ t.[ !j ] <- '\013';
+ t.[ !j+1 ] <- '\010';
+ j := !j + 2;
+ end
+ else begin
+ t.[ !j ] <- '\010';
+ incr j
+ end;
+ q := 0;
+ end;
+ end;
+ done;
+ (* padding if needed: *)
+ let m = len mod 3 in
+ begin
+ match m with
+ 0 -> ()
+ | 1 ->
+ let bits = Char.code (s.[pos + len - 1]) in
+ t.[ !j ] <- b64.( bits lsr 2);
+ t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
+ j := !j + 4;
+ q := !q + 4;
+ | 2 ->
+ let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
+ (Char.code (s.[pos + len - 1])) in
+ t.[ !j ] <- b64.( bits lsr 10);
+ t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f);
+ t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f);
+ j := !j + 4;
+ q := !q + 4;
+ | _ -> assert false
+ end;
+
+ (* If required, add another line end: *)
+
+ if linelen > 3 & !q > 0 then begin
+ if crlf then begin
+ t.[ !j ] <- '\013';
+ t.[ !j+1 ] <- '\010';
+ j := !j + 2;
+ end
+ else begin
+ t.[ !j ] <- '\010';
+ incr j
+ end;
+ end;
+
+ t ;;
+
+
+
+ let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_with_options rfc_pattern '=' s pos l linelength crlf;;
+
+
+ let encode_substring s ~pos ~len ~linelength ~crlf =
+ encode_with_options rfc_pattern '=' s pos len linelength crlf;;
+
+
+ let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_with_options url_pattern '.' s pos l linelength crlf;;
+
+
+ let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
+ if len < 0 or pos < 0 or pos > String.length t then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ if pos + len > String.length t then
+ invalid_arg "Netencoding.Base64.decode_substring";
+
+ (* Compute the number of effective characters l_t in 't';
+ * pad_chars: number of '=' characters at the end of the string.
+ *)
+ let l_t, pad_chars =
+ if p_spaces then begin
+ (* Count all non-whitespace characters: *)
+ let c = ref 0 in
+ let p = ref 0 in
+ for i = pos to pos + len - 1 do
+ match String.unsafe_get t i with
+ (' '|'\t'|'\r'|'\n') -> ()
+ | ('='|'.') as ch ->
+ if ch = '.' & not p_url then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ incr c;
+ incr p;
+ if !p > 2 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ for j = i+1 to pos + len - 1 do
+ match String.unsafe_get t j with
+ (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
+ | _ ->
+ (* Only another '=' or spaces allowed *)
+ invalid_arg "Netencoding.Base64.decode_substring";
+ done
+ | _ -> incr c
+ done;
+ if !c mod 4 <> 0 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ !c, !p
+ end
+ else
+ len,
+ ( if len mod 4 <> 0 then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ if len > 0 then (
+ if String.sub t (len - 2) 2 = "==" or
+ (p_url & String.sub t (len - 2) 2 = "..") then 2
+ else
+ if String.sub t (len - 1) 1 = "=" or
+ (p_url & String.sub t (len - 1) 1 = ".") then 1
+ else
+ 0
+ )
+ else 0
+ )
+ in
+
+ let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *)
+ let s = String.create l_s in
+
+ let decode_char c =
+ match c with
+ 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *)
+ | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *)
+ | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *)
+ | '+' -> 62
+ | '-' -> if not p_url then
+ invalid_arg "Netencoding.Base64.decode_substring";
+ 62
+ | '/' -> 63
+ | _ -> invalid_arg "Netencoding.Base64.decode_substring";
+ in
+
+ (* Decode all but the last quartet: *)
+
+ let cursor = ref pos in
+ let rec next_char() =
+ match t.[ !cursor ] with
+ (' '|'\t'|'\r'|'\n') ->
+ if p_spaces then (incr cursor; next_char())
+ else invalid_arg "Netencoding.Base64.decode_substring"
+ | c ->
+ incr cursor; c
+ in
+
+ if p_spaces then begin
+ for k = 0 to l_t / 4 - 2 do
+ let q = 3*k in
+ let c0 = next_char() in
+ let c1 = next_char() in
+ let c2 = next_char() in
+ let c3 = next_char() in
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ String.unsafe_set s q (Char.chr x0);
+ String.unsafe_set s (q+1) (Char.chr x1);
+ String.unsafe_set s (q+2) (Char.chr x2);
+ done;
+ end
+ else begin
+ (* Much faster: *)
+ for k = 0 to l_t / 4 - 2 do
+ let p = pos + 4*k in
+ let q = 3*k in
+ let c0 = String.unsafe_get t p in
+ let c1 = String.unsafe_get t (p + 1) in
+ let c2 = String.unsafe_get t (p + 2) in
+ let c3 = String.unsafe_get t (p + 3) in
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ String.unsafe_set s q (Char.chr x0);
+ String.unsafe_set s (q+1) (Char.chr x1);
+ String.unsafe_set s (q+2) (Char.chr x2);
+ done;
+ cursor := pos + l_t - 4;
+ end;
+
+ (* Decode the last quartet: *)
+
+ if l_t > 0 then begin
+ let q = 3*(l_t / 4 - 1) in
+ let c0 = next_char() in
+ let c1 = next_char() in
+ let c2 = next_char() in
+ let c3 = next_char() in
+
+ if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ s.[ q ] <- Char.chr x0;
+ end
+ else
+ if (c3 = '=') or (p_url & c3 = '.') then begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ s.[ q ] <- Char.chr x0;
+ s.[ q+1 ] <- Char.chr x1;
+ end
+ else begin
+ let n0 = decode_char c0 in
+ let n1 = decode_char c1 in
+ let n2 = decode_char c2 in
+ let n3 = decode_char c3 in
+ let x0 = (n0 lsl 2) lor (n1 lsr 4) in
+ let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
+ let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
+ s.[ q ] <- Char.chr x0;
+ s.[ q+1 ] <- Char.chr x1;
+ s.[ q+2 ] <- Char.chr x2;
+ end
+
+ end;
+
+ s ;;
+
+
+
+ let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l url_variant accept_spaces;;
+
+ let decode_ignore_spaces s =
+ decode_substring s 0 (String.length s) true true;;
+
+
+end
+
+
+
+module QuotedPrintable = struct
+
+ let encode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.encode_substring";
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ ('\r'|'\n') ->
+ count (n+1) (i+1)
+ | ('\000'..'\031'|'\127'..'\255'|
+ '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
+ count (n+3) (i+1)
+ | ' ' ->
+ (* Protect spaces only if they occur at the end of a line *)
+ if i+1 < len then
+ match s.[pos+i+1] with
+ ('\r'|'\n') ->
+ count (n+3) (i+1)
+ | _ ->
+ count (n+1) (i+1)
+ else
+ count (n+3) (i+1)
+ | _ ->
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+
+ let hexdigit =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+ let k = ref 0 in
+
+ let add_quoted c =
+ t.[ !k ] <- '=';
+ t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+ t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+ in
+
+ for i = 0 to len - 1 do
+ match String.unsafe_get s i with
+ ('\r'|'\n') as c ->
+ String.unsafe_set t !k c;
+ incr k
+ | ('\000'..'\031'|'\127'..'\255'|
+ '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
+ add_quoted c;
+ k := !k + 3
+ | ' ' ->
+ (* Protect spaces only if they occur at the end of a line *)
+ if i+1 < len then
+ match s.[pos+i+1] with
+ ('\r'|'\n') ->
+ add_quoted ' ';
+ k := !k + 3;
+ | _ ->
+ String.unsafe_set t !k ' ';
+ incr k
+ else begin
+ add_quoted ' ';
+ k := !k + 3;
+ end
+ | c ->
+ String.unsafe_set t !k c;
+ incr k
+ done;
+
+ t ;;
+
+
+ let encode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_substring s pos l;;
+
+
+
+ let decode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+
+ let decode_hex c =
+ match c with
+ '0'..'9' -> Char.code c - 48
+ | 'A'..'F' -> Char.code c - 55
+ | 'a'..'f' -> Char.code c - 87
+ | _ ->
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring";
+ in
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ '=' ->
+ if i+1 = len then
+ (* A '=' at EOF is ignored *)
+ count n (i+1)
+ else
+ if i+1 < len then
+ match s.[pos+i+1] with
+ '\r' ->
+ (* Official soft break *)
+ if i+2 < len & s.[pos+i+2] = '\n' then
+ count n (i+3)
+ else
+ count n (i+2)
+ | '\n' ->
+ (* Inofficial soft break *)
+ count n (i+2)
+ | _ ->
+ if i+2 >= len then
+ invalid_arg
+ "Netencoding.QuotedPrintable.decode_substring";
+ let _ = decode_hex s.[pos+i+1] in
+ let _ = decode_hex s.[pos+i+2] in
+ count (n+1) (i+3)
+ else
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+ | _ ->
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+ let k = ref pos in
+ let e = pos + len in
+ let i = ref 0 in
+
+ while !i < l do
+ match String.unsafe_get s !k with
+ '=' ->
+ if !k+1 = e then
+ (* A '=' at EOF is ignored *)
+ ()
+ else
+ if !k+1 < e then
+ match s.[!k+1] with
+ '\r' ->
+ (* Official soft break *)
+ if !k+2 < e & s.[!k+2] = '\n' then
+ k := !k + 3
+ else
+ k := !k + 2
+ | '\n' ->
+ (* Inofficial soft break *)
+ k := !k + 2
+ | _ ->
+ if !k+2 >= e then
+ invalid_arg
+ "Netencoding.QuotedPrintable.decode_substring";
+ let x1 = decode_hex s.[!k+1] in
+ let x2 = decode_hex s.[!k+2] in
+ t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+ k := !k + 3;
+ incr i
+ else
+ invalid_arg "Netencoding.QuotedPrintable.decode_substring"
+ | c ->
+ String.unsafe_set t !i c;
+ incr k;
+ incr i
+ done;
+
+ t ;;
+
+
+ let decode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l;;
+
+end
+
+
+module Q = struct
+
+ let encode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.Q.encode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Q.encode_substring";
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ | ('A'..'Z'|'a'..'z'|'0'..'9') ->
+ count (n+1) (i+1)
+ | _ ->
+ count (n+3) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+
+ let hexdigit =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in
+
+ let k = ref 0 in
+
+ let add_quoted c =
+ t.[ !k ] <- '=';
+ t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
+ t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
+ in
+
+ for i = 0 to len - 1 do
+ match String.unsafe_get s i with
+ | ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
+ String.unsafe_set t !k c;
+ incr k
+ | c ->
+ add_quoted c;
+ k := !k + 3
+ done;
+
+ t ;;
+
+
+ let encode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ encode_substring s pos l;;
+
+
+
+ let decode_substring s ~pos ~len =
+
+ if len < 0 or pos < 0 or pos > String.length s then
+ invalid_arg "Netencoding.Q.decode_substring";
+ if pos + len > String.length s then
+ invalid_arg "Netencoding.Q.decode_substring";
+
+ let decode_hex c =
+ match c with
+ '0'..'9' -> Char.code c - 48
+ | 'A'..'F' -> Char.code c - 55
+ | 'a'..'f' -> Char.code c - 87
+ | _ ->
+ invalid_arg "Netencoding.Q.decode_substring";
+ in
+
+ let rec count n i =
+ if i < len then
+ match String.unsafe_get s (pos+i) with
+ '=' ->
+ if i+2 >= len then
+ invalid_arg "Netencoding.Q.decode_substring";
+ let _ = decode_hex s.[pos+i+1] in
+ let _ = decode_hex s.[pos+i+2] in
+ count (n+1) (i+3)
+ | _ -> (* including '_' *)
+ count (n+1) (i+1)
+ else
+ n
+ in
+
+ let l = count 0 0 in
+ let t = String.create l in
+ let k = ref pos in
+ let e = pos + len in
+ let i = ref 0 in
+
+ while !i < l do
+ match String.unsafe_get s !k with
+ '=' ->
+ if !k+2 >= e then
+ invalid_arg "Netencoding.Q.decode_substring";
+ let x1 = decode_hex s.[!k+1] in
+ let x2 = decode_hex s.[!k+2] in
+ t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
+ k := !k + 3;
+ incr i
+ | '_' ->
+ String.unsafe_set t !i ' ';
+ incr k;
+ incr i
+ | c ->
+ String.unsafe_set t !i c;
+ incr k;
+ incr i
+ done;
+
+ t ;;
+
+
+ let decode ?(pos=0) ?len s =
+ let l = match len with None -> String.length s - pos | Some x -> x in
+ decode_substring s pos l ;;
+
+end
+
+
+module Url = struct
+ let hex_digits =
+ [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
+ '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;
+
+ let to_hex2 k =
+ (* Converts k to a 2-digit hex string *)
+ let s = String.create 2 in
+ s.[0] <- hex_digits.( (k lsr 4) land 15 );
+ s.[1] <- hex_digits.( k land 15 );
+ s ;;
+
+
+ let of_hex1 c =
+ match c with
+ ('0'..'9') -> Char.code c - Char.code '0'
+ | ('A'..'F') -> Char.code c - Char.code 'A' + 10
+ | ('a'..'f') -> Char.code c - Char.code 'a' + 10
+ | _ ->
+ raise Not_found ;;
+
+
+
+ let url_encoding_re =
+ Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;
+
+ let url_decoding_re =
+ Str.regexp "\\+\\|%..\\|%.\\|%";;
+
+
+ let encode s =
+ Str.global_substitute
+ url_encoding_re
+ (fun r _ ->
+ match Str.matched_string r s with
+ " " -> "+"
+ | x ->
+ let k = Char.code(x.[0]) in
+ "%" ^ to_hex2 k
+ )
+ s ;;
+
+
+ let decode s =
+ let l = String.length s in
+ Str.global_substitute
+ url_decoding_re
+ (fun r _ ->
+ match Str.matched_string r s with
+ | "+" -> " "
+ | _ ->
+ let i = Str.match_beginning r in
+ (* Assertion: s.[i] = '%' *)
+ if i+2 >= l then failwith "Cgi.decode";
+ let c1 = s.[i+1] in
+ let c2 = s.[i+2] in
+ begin
+ try
+ let k1 = of_hex1 c1 in
+ let k2 = of_hex1 c2 in
+ String.make 1 (Char.chr((k1 lsl 4) lor k2))
+ with
+ Not_found ->
+ failwith "Cgi.decode"
+ end
+ )
+ s ;;
+
+end
+
+
+module Html = struct
+
+ let eref_re =
+ Str.regexp
+ "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
+ let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
+
+ let etable =
+ [ "lt", "<";
+ "gt", ">";
+ "amp", "&";
+ "quot", "\"";
+ (* Note: " is new in HTML-4.0, but it has been widely used
+ * much earlier.
+ *)
+ "nbsp", "\160";
+ "iexcl", "\161";
+ "cent", "\162";
+ "pound", "\163";
+ "curren", "\164";
+ "yen", "\165";
+ "brvbar", "\166";
+ "sect", "\167";
+ "uml", "\168";
+ "copy", "\169";
+ "ordf", "\170";
+ "laquo", "\171";
+ "not", "\172";
+ "shy", "\173";
+ "reg", "\174";
+ "macr", "\175";
+ "deg", "\176";
+ "plusmn", "\177";
+ "sup2", "\178";
+ "sup3", "\179";
+ "acute", "\180";
+ "micro", "\181";
+ "para", "\182";
+ "middot", "\183";
+ "cedil", "\184";
+ "sup1", "\185";
+ "ordm", "\186";
+ "raquo", "\187";
+ "frac14", "\188";
+ "frac12", "\189";
+ "frac34", "\190";
+ "iquest", "\191";
+ "Agrave", "\192";
+ "Aacute", "\193";
+ "Acirc", "\194";
+ "Atilde", "\195";
+ "Auml", "\196";
+ "Aring", "\197";
+ "AElig", "\198";
+ "Ccedil", "\199";
+ "Egrave", "\200";
+ "Eacute", "\201";
+ "Ecirc", "\202";
+ "Euml", "\203";
+ "Igrave", "\204";
+ "Iacute", "\205";
+ "Icirc", "\206";
+ "Iuml", "\207";
+ "ETH", "\208";
+ "Ntilde", "\209";
+ "Ograve", "\210";
+ "Oacute", "\211";
+ "Ocirc", "\212";
+ "Otilde", "\213";
+ "Ouml", "\214";
+ "times", "\215";
+ "Oslash", "\216";
+ "Ugrave", "\217";
+ "Uacute", "\218";
+ "Ucirc", "\219";
+ "Uuml", "\220";
+ "Yacute", "\221";
+ "THORN", "\222";
+ "szlig", "\223";
+ "agrave", "\224";
+ "aacute", "\225";
+ "acirc", "\226";
+ "atilde", "\227";
+ "auml", "\228";
+ "aring", "\229";
+ "aelig", "\230";
+ "ccedil", "\231";
+ "egrave", "\232";
+ "eacute", "\233";
+ "ecirc", "\234";
+ "euml", "\235";
+ "igrave", "\236";
+ "iacute", "\237";
+ "icirc", "\238";
+ "iuml", "\239";
+ "eth", "\240";
+ "ntilde", "\241";
+ "ograve", "\242";
+ "oacute", "\243";
+ "ocirc", "\244";
+ "otilde", "\245";
+ "ouml", "\246";
+ "divide", "\247";
+ "oslash", "\248";
+ "ugrave", "\249";
+ "uacute", "\250";
+ "ucirc", "\251";
+ "uuml", "\252";
+ "yacute", "\253";
+ "thorn", "\254";
+ "yuml", "\255";
+ ] ;;
+
+ let quick_etable =
+ let ht = Hashtbl.create 50 in
+ List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
+ (* Entities to be decoded, but that must not be encoded: *)
+ Hashtbl.add ht "apos" "'"; (* used in XML documents *)
+ ht ;;
+
+ let rev_etable =
+ let a = Array.create 256 "" in
+ List.iter (fun (name,value) ->
+ a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
+ for i = 0 to 8 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ for i = 11 to 12 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ for i = 14 to 31 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ for i = 127 to 159 do
+ a.(i) <- "&#" ^ string_of_int i ^ ";"
+ done;
+ a ;;
+
+ let decode_to_latin1 s =
+ Str.global_substitute
+ eref_re
+ (fun r _ ->
+ let t = Str.matched_string r s in
+ try
+ let n = int_of_string(Str.matched_group r 2 s) in
+ if n < 256 then
+ String.make 1 (Char.chr n)
+ else
+ t
+ with
+ Not_found ->
+ try
+ let name = Str.matched_group r 3 s in
+ try
+ Hashtbl.find quick_etable name
+ with
+ Not_found ->
+ t
+ with
+ Not_found -> assert false
+ )
+ s ;;
+
+ let encode_from_latin1 s =
+ Str.global_substitute
+ unsafe_re
+ (fun r _ ->
+ let t = Str.matched_string r s in
+ let i = Char.code (t.[0]) in
+ rev_etable.(i)
+ )
+ s ;;
+end
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.4 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.3 2000/03/03 17:03:16 gerd
+ * Q encoding: CR and LF are quoted.
+ *
+ * Revision 1.2 2000/03/03 01:08:29 gerd
+ * Added Netencoding.Html functions.
+ *
+ * Revision 1.1 2000/03/02 01:14:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(**********************************************************************)
+(* Several encodings important for the net *)
+(**********************************************************************)
+
+
+(**********************************************************************)
+(* Base 64 encoding *)
+(**********************************************************************)
+
+(* See RFC 2045 for a description of Base 64 encoding. *)
+
+(* THREAD-SAFETY:
+ * All Base64 functions are reentrant and thus thread-safe.
+ *)
+
+module Base64 : sig
+
+ val encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
+ string -> string
+ (* Compute the "base 64" encoding of the given string argument.
+ * Note that the result is a string that only contains the characters
+ * a-z, A-Z, 0-9, +, /, =, and optionally spaces, CR and LF characters.
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is encoded.
+ *
+ * The result is divided up into lines not longer than 'linelength'
+ * (without counting the line separator); default: do not divide lines.
+ * If 'linelength' is smaller than 4, no line division is performed.
+ * If 'linelength' is not divisible by 4, the produced lines are a
+ * bit shorter than 'linelength'.
+ *
+ * If 'crlf' (default: false) the lines are ended by CRLF; otherwise
+ * they are only ended by LF.
+ * (You need the crlf option to produce correct MIME messages.)
+ *
+ *)
+
+ val url_encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool ->
+ string -> string
+ (* Same as 'encode' but use slightly different characters that can be
+ * part of URLs without additional encodings.
+ * The encoded string consists only of the characters a-z, A-Z, 0-9,
+ * -, /, .
+ * 'url_encode' does NOT implement the Base 64 encoding as described
+ * in the standard!
+ *)
+
+ val encode_substring : string -> pos:int -> len:int -> linelength:int ->
+ crlf:bool -> string
+ (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
+ *
+ * encode_substring s pos len linelen crlf:
+ * Encodes the substring at position 'pos' in 's' with length 'len'.
+ * The result is divided up into lines not longer than 'linelen' (without
+ * counting the line separator).
+ * If 'linelen' is smaller than 4, no line division is performed.
+ * If 'linelen' is not divisible by 4, the produced lines are a
+ * bit shorter than 'linelen'.
+ * If 'crlf' the lines are ended by CRLF; otherwise they are only
+ * ended by LF.
+ * (You need the crlf option to produce correct MIME messages.)
+ *)
+
+ val decode : ?pos:int -> ?len:int -> ?url_variant:bool ->
+ ?accept_spaces:bool -> string -> string
+ (* Decodes the given string argument.
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is decoded.
+ *
+ * If url_variant (default: true) is set, the functions also
+ * accepts the characters '-' and '.' as produced by 'url_encode'.
+ *
+ * If accept_spaces (default: false) is set, the function ignores
+ * white space contained in the string to decode (otherwise the
+ * function fails if it finds white space).
+ *)
+
+ val decode_ignore_spaces : string -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+ *
+ * Decodes the string, too, but it is allowed that the string contains
+ * whitespace characters.
+ * This function is slower than 'decode'.
+ *)
+
+ val decode_substring : string -> pos:int -> len:int -> url_variant:bool ->
+ accept_spaces:bool -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+ *
+ * decode_substring s pos len url spaces:
+ * Decodes the substring of 's' beginning at 'pos' with length 'len'.
+ * If 'url', strings created by 'url_encode' are accepted, too.
+ * If 'spaces', whitespace characters are allowed in the string.
+ *)
+end
+
+(**********************************************************************)
+(* Quoted printable encoding *)
+(**********************************************************************)
+
+(* See RFC 2045.
+ * This implementation assumes that the encoded string has a text MIME
+ * type. Because of this, the characters CR and LF are never protected
+ * by hex tokens; they are copied literally to the output string.
+ *)
+
+(* THREAD-SAFETY:
+ * All QuotedPrintable functions are reentrant and thus thread-safe.
+ *)
+
+module QuotedPrintable :
+ sig
+ val encode : ?pos:int -> ?len:int -> string -> string
+ (* Encodes the string and returns it.
+ * Note line breaks:
+ * No additional soft line breaks are added. The characters CR
+ * and LF are not represented as =0D resp. =0A. (But other control
+ * characters ARE encoded.)
+ * Note unsafe characters:
+ * As recommended by RFC 2045, the characters !\"#$@[]^`{|}~
+ * are additionally represented as hex tokens. -- "
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is encoded.
+ *)
+
+ val encode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'encode' instead! ***
+ * encode_substring s pos len:
+ * Encodes the substring of 's' beginning at 'pos' with length 'len'.
+ *)
+
+ val decode : ?pos:int -> ?len:int -> string -> string
+ (* Decodes the string and returns it.
+ * Most format errors cause an Invalid_argument exception.
+ * Note that soft line breaks can be properly decoded although
+ * 'encode' will never produce them.
+ *
+ * If pos and/or len are passed, only the substring starting at
+ * pos (default: 0) with length len (default: rest of the string)
+ * is decoded.
+ *)
+
+ val decode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! ***
+ * decode_substring s pos len:
+ * Decodes the substring of 's' beginning at 'pos' with length 'len'.
+ *)
+
+ end
+
+(**********************************************************************)
+(* Q encoding *)
+(**********************************************************************)
+
+(* See RFC 2047.
+ * The functions behave similar to those of QuotedPrintable.
+ *)
+
+(* THREAD-SAFETY:
+ * All Q functions are reentrant and thus thread-safe.
+ *)
+
+module Q :
+ sig
+ val encode : ?pos:int -> ?len:int -> string -> string
+ (* Note:
+ * All characters except alphanumeric characters are protected by
+ * hex tokens.
+ * In particular, spaces are represented as "=20", not as "_".
+ *)
+
+ val decode : ?pos:int -> ?len:int -> string -> string
+
+ val encode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'encode' instead! *** *)
+
+ val decode_substring : string -> pos:int -> len:int -> string
+ (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** *)
+ end
+
+(**********************************************************************)
+(* B encoding *)
+(**********************************************************************)
+
+(* The B encoding of RFC 2047 is the same as Base64. *)
+
+
+(**********************************************************************)
+(* URL-encoding *)
+(**********************************************************************)
+
+(* Encoding/Decoding within URLs:
+ *
+ * The following two functions perform the '%'-substitution for
+ * characters that may otherwise be interpreted as metacharacters.
+ *
+ * According to: RFC 1738, RFC 1630
+ *)
+
+(* THREAD-SAFETY:
+ * The Url functions are thread-safe.
+ *)
+
+module Url :
+ sig
+ val decode : string -> string
+ val encode : string -> string
+ end
+
+
+(**********************************************************************)
+(* HTMLization *)
+(**********************************************************************)
+
+(* Encodes characters that need protection by converting them to
+ * entity references. E.g. "<" is converted to "<".
+ * As the entities may be named, there is a dependency on the character
+ * set. Currently, there are only functions for the Latin 1 alphabet.
+ *)
+
+(* THREAD-SAFETY:
+ * The Html functions are thread-safe.
+ *)
+
+module Html :
+ sig
+ val encode_from_latin1 : string -> string
+ (* Encodes the characters 0-8, 11-12, 14-31, '<', '>', '"', '&',
+ * 127-255. If the characters have a name, a named entity is
+ * preferred over a numeric entity.
+ *)
+ val decode_to_latin1 : string -> string
+ (* Decodes the string. Unknown named entities are left as they
+ * are (i.e. decode_to_latin1 "&nonsense;" = "&nonsense;").
+ * The same applies to numeric entities greater than 255.
+ *)
+ end
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.3 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.2 2000/03/03 01:08:29 gerd
+ * Added Netencoding.Html functions.
+ *
+ * Revision 1.1 2000/03/02 01:14:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Nethtml_scanner;;
+
+type document =
+ Element of (string * (string*string) list * document list)
+ | Data of string
+;;
+
+
+exception End_of_scan;;
+
+
+let no_end_tag = (* empty HTML elements *)
+ ref
+ [ "isindex";
+ "base";
+ "meta";
+ "link";
+ "hr";
+ "input";
+ "img";
+ "param";
+ "basefont";
+ "br";
+ "area";
+ ]
+;;
+
+
+let special_tag = (* other lexical rules *)
+ ref
+ [ "script";
+ "style";
+ ]
+;;
+
+
+let rec parse_comment buf =
+ let t = scan_comment buf in
+ match t with
+ Mcomment ->
+ parse_comment buf
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ ()
+;;
+
+
+let rec parse_doctype buf =
+ let t = scan_doctype buf in
+ match t with
+ Mdoctype ->
+ parse_doctype buf
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ ()
+;;
+
+
+let parse_document buf =
+ let current_name = ref "" in
+ let current_atts = ref [] in
+ let current_subs = ref [] in
+ let stack = Stack.create() in
+
+ let parse_atts() =
+ let rec next_no_space() =
+ match scan_element buf with
+ Space _ -> next_no_space()
+ | t -> t
+ in
+
+ let rec parse_atts_lookahead next =
+ match next with
+ Relement -> []
+ | Name n ->
+ begin match next_no_space() with
+ Is ->
+ begin match next_no_space() with
+ Name v ->
+ (String.lowercase n, String.uppercase v) ::
+ parse_atts_lookahead (next_no_space())
+ | Literal v ->
+ (String.lowercase n,v) ::
+ parse_atts_lookahead (next_no_space())
+ | Eof ->
+ raise End_of_scan
+ | Relement ->
+ (* Illegal *)
+ []
+ | _ ->
+ (* Illegal *)
+ parse_atts_lookahead (next_no_space())
+ end
+ | Eof ->
+ raise End_of_scan
+ | Relement ->
+ (* <tag name> <==> <tag name="name"> *)
+ [ String.lowercase n, String.lowercase n ]
+ | next' ->
+ (* assume <tag name ... > <==> <tag name="name" ...> *)
+ ( String.lowercase n, String.lowercase n ) ::
+ parse_atts_lookahead next'
+ end
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ (* Illegal *)
+ parse_atts_lookahead (next_no_space())
+ in
+ parse_atts_lookahead (next_no_space())
+ in
+
+ let rec parse_special name =
+ (* Parse until </name> *)
+ match scan_special buf with
+ Lelementend n ->
+ if n = name then
+ ""
+ else
+ "</" ^ n ^ parse_special name
+ | Eof ->
+ raise End_of_scan
+ | Cdata s ->
+ s ^ parse_special name
+ | _ ->
+ (* Illegal *)
+ parse_special name
+ in
+
+ let rec skip_element() =
+ (* Skip until ">" *)
+ match scan_element buf with
+ Relement ->
+ ()
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ skip_element()
+ in
+
+ let rec parse_next() =
+ let t = scan_document buf in
+ match t with
+ Lcomment ->
+ parse_comment buf;
+ parse_next()
+ | Ldoctype ->
+ parse_doctype buf;
+ parse_next()
+ | Lelement name ->
+ let name = String.lowercase name in
+ if List.mem name !no_end_tag then begin
+ let atts = parse_atts() in
+ current_subs := (Element(name, atts, [])) :: !current_subs;
+ parse_next()
+ end
+ else if List.mem name !special_tag then begin
+ let atts = parse_atts() in
+ let data = parse_special name in
+ (* Read until ">" *)
+ skip_element();
+ current_subs := (Element(name, atts, [Data data])) :: !current_subs;
+ parse_next()
+ end
+ else begin
+ let atts = parse_atts() in
+ Stack.push (!current_name, !current_atts, !current_subs) stack;
+ current_name := name;
+ current_atts := atts;
+ current_subs := [];
+ parse_next()
+ end
+ | Cdata data ->
+ current_subs := (Data data) :: !current_subs;
+ parse_next()
+ | Lelementend name ->
+ let name = String.lowercase name in
+ (* Read until ">" *)
+ skip_element();
+ (* Search the element to close on the stack: *)
+ let found = ref (name = !current_name) in
+ Stack.iter
+ (fun (old_name, _, _) ->
+ if name = old_name then found := true)
+ stack;
+ (* If not found, the end tag is wrong. Simply ignore it. *)
+ if not !found then
+ parse_next()
+ else begin
+ (* Put the current element on to the stack: *)
+ Stack.push (!current_name, !current_atts, !current_subs) stack;
+ (* If found: Remove the elements from the stack, and append
+ * them to the previous element as sub elements
+ *)
+ let rec remove() =
+ let old_name, old_atts, old_subs = Stack.pop stack in
+ (* or raise Stack.Empty *)
+ if old_name = name then
+ old_name, old_atts, old_subs
+ else
+ let older_name, older_atts, older_subs = remove() in
+ older_name,
+ older_atts,
+ (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
+ in
+ let old_name, old_atts, old_subs = remove() in
+ (* Remove one more element: the element containing the element
+ * currently being closed.
+ *)
+ let new_name, new_atts, new_subs = Stack.pop stack in
+ current_name := new_name;
+ current_atts := new_atts;
+ current_subs := (Element (old_name, old_atts, List.rev old_subs))
+ :: new_subs;
+ (* Go on *)
+ parse_next()
+ end
+ | Eof ->
+ raise End_of_scan
+ | _ ->
+ parse_next()
+ in
+ try
+ parse_next();
+ List.rev !current_subs
+ with
+ End_of_scan ->
+ (* Close all remaining elements: *)
+ Stack.push (!current_name, !current_atts, !current_subs) stack;
+ let rec remove() =
+ let old_name, old_atts, old_subs = Stack.pop stack in
+ (* or raise Stack.Empty *)
+ try
+ let older_name, older_atts, older_subs = remove() in
+ older_name,
+ older_atts,
+ (Element (old_name, old_atts, List.rev old_subs) :: older_subs)
+ with
+ Stack.Empty ->
+ old_name, old_atts, old_subs
+ in
+ let name, atts, subs = remove() in
+ List.rev subs
+;;
+
+
+let parse_string s =
+ let buf = Lexing.from_string s in
+ parse_document buf
+;;
+
+
+let parse_file fd =
+ let buf = Lexing.from_channel fd in
+ parse_document buf
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/03 01:07:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* The type 'document' represents parsed HTML documents.
+ * Element (name, args, subnodes): is an element node for an element of
+ * type 'name' (i.e. written <name ...>...</name>) with arguments 'args'
+ * and subnodes 'subnodes' (the material within the element). The arguments
+ * are simply name/value pairs. Entity references (something like %xy;)
+ * occuring in the values are NOT resolved.
+ * Arguments without values (e.g. <select name="x" multiple>: here,
+ * "multiple" is such an argument) are represented as (name,name), i.e. the
+ * name is returned as value.
+ * As argument names are case-insensitive, the names are all lowercase.
+ * Data s: is a character data node. Again, entity references are contained
+ * as such and not as what they mean.
+ *)
+
+type document =
+ Element of (string * (string*string) list * document list)
+ | Data of string
+;;
+
+
+val no_end_tag : string list ref;;
+ (* List of tags which are always empty. This variable is pre-configured,
+ * but you may want to change it.
+ * It is important to know which elements are always empty, because HTML
+ * allows it to omit the end tag for them. For example,
+ * <a><b>x</a> is parsed as
+ * Element("a",[],[ Element("b",[],[]); Data "x" ])
+ * if we know that "a" is an empty element, but it is wrongly parsed as
+ * Element("a",[],[ Element("b",[], [ Data "x"]) ])
+ * if "a" is actually empty but we do not know it.
+ * An example of such a tag is "br".
+ *)
+
+val special_tag : string list ref;;
+ (* List of tags with a special rule for recognizing the end.
+ * This variable is pre-configured, but you may want to change it.
+ * The special rule is that the metacharacters '<', '>' and so on lose
+ * their meaning within the element, and that only the corresponding
+ * end tag stops this kind of scanning. An example is the element
+ * "javascript". Inner elements are not recognized, and the element
+ * can only be ended by </javascript>. (Other elements are also ended
+ * if an embracing element ends, e.g. "j" in <k><j></k>!)
+ *
+ * Note that comments are not recognized within special elements;
+ * comments are returned as character material.
+ *)
+
+val parse_string : string -> document list
+ (* Parses the HTML document from a string and returns it *)
+
+val parse_file : in_channel -> document list
+ (* Parses the HTML document from a file and returns it *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/03 01:07:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+{
+ type token =
+ Lcomment
+ | Rcomment
+ | Mcomment
+ | Ldoctype
+ | Rdoctype
+ | Mdoctype
+ | Lelement of string
+ | Lelementend of string
+ | Relement
+ | Cdata of string
+ | Space of int
+ | Name of string
+ | Is
+ | Literal of string
+ | Other
+ | Eof
+}
+
+(* Simplified rules: Only Latin-1 is recognized as character set *)
+
+let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
+let extender = '\183'
+let digit = ['0'-'9']
+let hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
+let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
+let name = ( letter | '_' | ':' ) namechar*
+let nmtoken = namechar+
+let ws = [ ' ' '\t' '\r' '\n' ]
+let string_literal1 = '"' [^ '"' '>' '<' '\n']* '"'
+let string_literal2 = "'" [^ '\'' '>' '<' '\n']* "'"
+
+
+(* This following rules reflect HTML as it is used, not the SGML
+ * rules.
+ *)
+
+rule scan_document = parse
+ | "<!--"
+ { Lcomment }
+ | "<!"
+ { Ldoctype }
+ | "<" name
+ { let s = Lexing.lexeme lexbuf in
+ Lelement (String.sub s 1 (String.length s - 1))
+ }
+ | "</" name
+ { let s = Lexing.lexeme lexbuf in
+ Lelementend (String.sub s 2 (String.length s - 2))
+ }
+ | "<" (* misplaced "<" *)
+ { Cdata "<" }
+ | eof
+ { Eof }
+ | [^ '<' ]+
+ { Cdata (Lexing.lexeme lexbuf)}
+
+and scan_special = parse
+ | "</" name
+ { let s = Lexing.lexeme lexbuf in
+ Lelementend (String.sub s 2 (String.length s - 2))
+ }
+ | "<"
+ { Cdata "<" }
+ | eof
+ { Eof }
+ | [^ '<' ]+
+ { Cdata (Lexing.lexeme lexbuf)}
+
+
+and scan_comment = parse
+ | "-->"
+ { Rcomment }
+ | "-"
+ { Mcomment }
+ | eof
+ { Eof }
+ | [^ '-']+
+ { Mcomment }
+
+and scan_doctype = parse
+ | ">" (* Occurence in strings, and [ ] brackets ignored *)
+ { Rdoctype }
+ | eof
+ { Eof }
+ | [^ '>' ] +
+ { Mdoctype }
+
+and scan_element = parse
+ | ">"
+ { Relement }
+ | ws+
+ { Space (String.length (Lexing.lexeme lexbuf)) }
+ | name
+ { Name (Lexing.lexeme lexbuf) }
+ | "="
+ { Is }
+ | string_literal1
+ { let s = Lexing.lexeme lexbuf in
+ Literal (String.sub s 1 (String.length s - 2))
+ }
+ | string_literal2
+ { let s = Lexing.lexeme lexbuf in
+ Literal (String.sub s 1 (String.length s - 2))
+ }
+ | eof
+ { Eof }
+ | _
+ { Other }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/03/03 01:07:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type from_uni_list =
+ U_nil
+ | U_single of (int*int)
+ | U_list of (int*int) list
+;;
+
+let to_unicode = Hashtbl.create 50;;
+
+let from_unicode = Hashtbl.create 50;;
+
+let f_lock = ref (fun () -> ());;
+let f_unlock = ref (fun () -> ());;
+
+let lock () = !f_lock();;
+let unlock () = !f_unlock();;
+
+let init_mt new_f_lock new_f_unlock =
+ f_lock := new_f_lock;
+ f_unlock := new_f_unlock
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/08/28 23:17:54 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+type from_uni_list =
+ U_nil
+ | U_single of (int*int)
+ | U_list of (int*int) list
+;;
+ (* A representation of (int*int) list that is optimized for the case that
+ * lists with 0 and 1 elements are the most frequent cases.
+ *)
+
+
+val to_unicode : (Netconversion.encoding,
+ int array Lazy.t) Hashtbl.t;;
+
+val from_unicode : (Netconversion.encoding,
+ from_uni_list array Lazy.t) Hashtbl.t;;
+ (* These hashtables are used internally by the parser to store
+ * the conversion tables from 8 bit encodings to Unicode and vice versa.
+ * It is normally not necessary to access these tables; the
+ * Netconversion module does it already for you.
+ *
+ * Specification of the conversion tables:
+ *
+ * to_unicode: maps an 8 bit code to Unicode, i.e.
+ * let m = Hashtbl.find `Enc_isoXXX to_unicode in
+ * let unicode = m.(isocode)
+ * - This may be (-1) to indicate that the code point is not defined.
+ *
+ * from_unicode: maps Unicode to an 8 bit code, i.e.
+ * let m = Hashtbl.find `Enc_isoXXX from_unicode in
+ * let l = m.(unicode land 255)
+ * Now search in l the pair (unicode, isocode), and return isocode.
+ *
+ * Note: It is guaranteed that both arrays have always 256 elements.
+ *)
+
+val lock : unit -> unit
+ (* In multi-threaded applications: obtains a lock which is required to
+ * Lazy.force the values found in to_unicode and from_unicode.
+ * In single-threaded applications: a NO-OP
+ *)
+
+val unlock : unit -> unit
+ (* In multi-threaded applications: releases the lock which is required to
+ * Lazy.force the values found in to_unicode and from_unicode.
+ * In single-threaded applications: a NO-OP
+ *)
+
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+ (* Internally used; see netstring_mt.ml *)
+
+
+(* ---------------------------------------- *)
+
+(* The following comment was written when the conversion module belonged
+ * to the PXP package (Polymorhic XML Parser).
+ *)
+
+(* HOW TO ADD A NEW 8 BIT CODE:
+ *
+ * It is relatively simple to add a new 8 bit code to the system. This
+ * means that the parser can read and write files with the new encoding;
+ * this does not mean that the parser can represent the XML tree internally
+ * by the new encoding.
+ *
+ * - Put a new unimap file into the "mappings" directory. The file format
+ * is simple; please look at the already existing files.
+ * The name of the file determines the internal name of the code:
+ * If the file is called <name>.unimap, the code will be called
+ * `Enc_<name>.
+ *
+ * - Extend the type "encoding" in pxp_types.mli and pxp_types.ml
+ *
+ * - Extend the two functions encoding_of_string and string_of_encoding
+ * in pxp_types.ml
+ *
+ * - Recompile the parser
+ *
+ * Every encoding consumes at least 3kB of memory, but this may be much more
+ * if the code points are dispersed on the Unicode code space.
+ *
+ * Perhaps the addition of new codes will become even simpler in future
+ * versions of PXP; but it is currently more important to support
+ * non-8-bit codes, too.
+ * Every contribution of new codes to PXP is welcome!
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/29 00:47:24 gerd
+ * New type for the conversion Unicode to 8bit.
+ * Conversion tables are now lazy. Thus also mutexes are required.
+ *
+ * Revision 1.1 2000/08/13 00:02:57 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_mappings.mli):
+ *
+ * Revision 1.1 2000/07/27 00:40:02 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* WARNING! This is a generated file! *)
+let iso88591_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~int array);;
+let iso88591_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\164\144\160\001\000\165\001\000\165\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169\144\160\001\000\170\001\000\170\144\160\001\000\171\001\000\171\144\160\001\000\172\001\000\172\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174\144\160\001\000\175\001\000\175\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177\144\160\001\000\178\001\000\178\144\160\001\000\179\001\000\179\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184\144\160\001\000\185\001\000\185\144\160\001\000\186\001\000\186\144\160\001\000\187\001\000\187\144\160\001\000\188\001\000\188\144\160\001\000\189\001\000\189\144\160\001\000\190\001\000\190\144\160\001\000\191\001\000\191\144\160\001\000\192\001\000\192\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194\144\160\001\000\195\001\000\195\144\160\001\000\196\001\000\196\144\160\001\000\197\001\000\197\144\160\001\000\198\001\000\198\144\160\001\000\199\001\000\199\144\160\001\000\200\001\000\200\144\160\001\000\201\001\000\201\144\160\001\000\202\001\000\202\144\160\001\000\203\001\000\203\144\160\001\000\204\001\000\204\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206\144\160\001\000\207\001\000\207\144\160\001\000\208\001\000\208\144\160\001\000\209\001\000\209\144\160\001\000\210\001\000\210\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212\144\160\001\000\213\001\000\213\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\000\216\001\000\216\144\160\001\000\217\001\000\217\144\160\001\000\218\001\000\218\144\160\001\000\219\001\000\219\144\160\001\000\220\001\000\220\144\160\001\000\221\001\000\221\144\160\001\000\222\001\000\222\144\160\001\000\223\001\000\223\144\160\001\000\224\001\000\224\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226\144\160\001\000\227\001\000\227\144\160\001\000\228\001\000\228\144\160\001\000\229\001\000\229\144\160\001\000\230\001\000\230\144\160\001\000\231\001\000\231\144\160\001\000\232\001\000\232\144\160\001\000\233\001\000\233\144\160\001\000\234\001\000\234\144\160\001\000\235\001\000\235\144\160\001\000\236\001\000\236\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238\144\160\001\000\239\001\000\239\144\160\001\000\240\001\000\240\144\160\001\000\241\001\000\241\144\160\001\000\242\001\000\242\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244\144\160\001\000\245\001\000\245\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247\144\160\001\000\248\001\000\248\144\160\001\000\249\001\000\249\144\160\001\000\250\001\000\250\144\160\001\000\251\001\000\251\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253\144\160\001\000\254\001\000\254\144\160\001\000\255\001\000\255" 0 : Netmappings.from_uni_list array);;
+ let iso885910_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\001\018\001\001\"\001\001*\001\001(\001\0016\001\000\167\001\001;\001\001\016\001\001`\001\001f\001\001}\001\000\173\001\001j\001\001J\001\000\176\001\001\005\001\001\019\001\001#\001\001+\001\001)\001\0017\001\000\183\001\001<\001\001\017\001\001a\001\001g\001\001~\001 \021\001\001k\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\000\207\001\000\208\001\001E\001\001L\001\000\211\001\000\212\001\000\213\001\000\214\001\001h\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\000\221\001\000\222\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\000\239\001\000\240\001\001F\001\001M\001\000\243\001\000\244\001\000\245\001\000\246\001\001i\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\000\253\001\000\254\001\0018" 0 : int array);;
+let iso885910_from_unicode = lazy (Marshal.from_string``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\163@\145\160\160cc\160\160\001\001#\001\000\179@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\164@\145\160\160kk\160\160\001\001+\001\000\180@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\166@\145\160\160ww\160\160\001\0017\001\000\182@\145\160\160xx\160\160\001\0018\001\000\255@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\168@\145\160\160||\160\160\001\001<\001\000\184@\144\160}}\144\160~~`\000`\160\160\001\001`\001\000\170@\145\160\160\000a\000a\160\160\001\001a\001\000\186@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\171@\145\160\160\000g\000g\160\160\001\001g\001\000\187@\145\160\160\000h\000h\160\160\001\001h\001\000\215@\145\160\160\000i\000i\160\160\001\001i\001\000\247@\145\160\160\000j\000j\160\160\001\001j\001\000\174@\145\160\160\000k\000k\160\160\001\001k\001\000\190@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\172@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let iso885913_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001 \025" 0 : int array);;
+let iso885913_from_unicode = lazy (Marshal.from_string``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let iso885914_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\030\002\001\030\003\001\000\163\001\001\n\001\001\011\001\030\n\001\000\167\001\030\128\001\000\169\001\030\130\001\030\011\001\030\242\001\000\173\001\000\174\001\001x\001\030\030\001\030\031\001\001 \001\001!\001\030@\001\030A\001\000\182\001\030V\001\030\129\001\030W\001\030\131\001\030`\001\030\243\001\030\132\001\030\133\001\030a\001\000\192\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\001\001t\001\000\209\001\000\210\001\000\211\001\000\212\001\000\213\001\000\214\001\030j\001\000\216\001\000\217\001\000\218\001\000\219\001\000\220\001\000\221\001\001v\001\000\223\001\000\224\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\001\001u\001\000\241\001\000\242\001\000\243\001\000\244\001\000\245\001\000\246\001\030k\001\000\248\001\000\249\001\000\250\001\000\251\001\000\252\001\000\253\001\001w\001\000\255" 0 : int array);;
+let iso885914_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\222\000\000\000\000\000\000\006w\000\000\006w\008\000\004\000\000\144\160@@\144\160AA\145\160\160BB\160\160\001\030\002\001\000\161@\145\160\160CC\160\160\001\030\003\001\000\162@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\145\160\160JJ\160\160\001\001\n\001\000\164\160\160\001\030\n\001\000\166@\145\160\160KK\160\160\001\001\011\001\000\165\160\160\001\030\011\001\000\171@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\145\160\160^^\160\160\001\030\030\001\000\176@\145\160\160__\160\160\001\030\031\001\000\177@\145\160\160``\160\160\001\001 \001\000\178@\145\160\160aa\160\160\001\001!\001\000\179@\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\030@\001\000\180@\145\160\160\000A\000A\160\160\001\030A\001\000\181@\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\001\030V\001\000\183@\145\160\160\000W\000W\160\160\001\030W\001\000\185@\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\030`\001\000\187@\145\160\160\000a\000a\160\160\001\030a\001\000\191@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\030j\001\000\215@\145\160\160\000k\000k\160\160\001\030k\001\000\247@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\145\160\160\000t\000t\160\160\001\001t\001\000\208@\145\160\160\000u\000u\160\160\001\001u\001\000\240@\145\160\160\000v\000v\160\160\001\001v\001\000\222@\145\160\160\000w\000w\160\160\001\001w\001\000\254@\145\160\160\000x\000x\160\160\001\001x\001\000\175@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let iso885915_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\000\161\001\000\162\001\000\163\001 \172\001\000\165\001\001`\001\000\167\001\001a\001\000\169\001\000\170\001\000\171\001\000\172\001\000\173\001\000\174\001\000\175\001\000\176\001\000\177\001\000\178\001\000\179\001\001}\001\000\181\001\000\182\001\000\183\001\001~\001\000\185\001\000\186\001\000\187\001\001R\001\001S\001\001xint array);;
+let iso885915_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\157\000\000\000\000\000\000\006!\000\000\006!\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\188@\145\160\160\000S\000S\160\160\001\001S\001\000\189@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\166@\145\160\160\000a\000a\160\160\001\001a\001\000\168@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\190@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\180@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let iso88592_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\002\216\001\001A\001\000\164\001\001=\001\001Z\001\000\167\001\000\168\001\001`\001\001^\001\001d\001\001y\001\000\173\001\001}\001\001{\001\000\176\001\001\005\001\002\219\001\001B\001\000\180\001\001>\001\001[\001\002\199\001\000\184\001\001a\001\001_\001\001e\001\001z\001\002\221\001\001~\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
+let iso88592_from_unicode = lazy (Marshal.from_string``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\165@\145\160\160~~`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\171@\145\160\160\000e\000e\160\160\001\001e\001\000\187@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\172@\145\160\160\000z\000z\160\160\001\001z\001\000\188@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let iso88593_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001&\001\002\216\001\000\163\001\000\164\000\255\001\001$\001\000\167\001\000\168\001\0010\001\001^\001\001\030\001\0014\001\000\173\000\255\001\001{\001\000\176\001\001'\001\000\178\001\000\179\001\000\180\001\000\181\001\001%\001\000\183\001\000\184\001\0011\001\001_\001\001\031\001\0015\001\000\189\000\255\001\001|\001\000\192\001\000\193\001\000\194\000\255\001\000\196\001\001\n\001\001\008\001\000\199\001\000\200\001\000\201\001\000\202\001\000\203\001\000\204\001\000\205\001\000\206\001\000\207\000\255\001\000\209\001\000\210\001\000\211\001\000\212\001\001 \001\000\214\001\000\215\001\001\028\001\000\217\001\000\218\001\000\219\001\000\220\001\001l\001\001\\\001\000\223\001\000\224\001\000\225\001\000\226\000\255\001\000\228\001\001\011\001\001\t\001\000\231\001\000\232\001\000\233\001\000\234\001\000\235\001\000\236\001\000\237\001\000\238\001\000\239\000\255\001\000\241\001\000\242\001\000\243\001\000\244\001\001!\001\000\246\001\000\247\001\001\029\001\000\249\001\000\250\001\000\251\001\000\252\001\001m\001\001]\001\002\217" 0 : int array);;
+let iso88593_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\165\000\000\000\000\000\000\006J\000\000\006J\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\145\160\160HH\160\160\001\001\008\001\000\198@\145\160\160II\160\160\001\001\t\001\000\230@\145\160\160JJ\160\160\001\001\n\001\000\197@\145\160\160KK\160\160\001\001\011\001\000\229@\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\144\160WW\144\160XX\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001\001\028\001\000\216@\145\160\160]]\160\160\001\001\029\001\000\248@\145\160\160^^\160\160\001\001\030\001\000\171@\145\160\160__\160\160\001\001\031\001\000\187@\145\160\160``\160\160\001\001 \001\000\213@\145\160\160aa\160\160\001\001!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001\001$\001\000\166@\145\160\160ee\160\160\001\001%\001\000\182@\145\160\160ff\160\160\001\001&\001\000\161@\145\160\160gg\160\160\001\001'\001\000\177@\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\169@\145\160\160qq\160\160\001\0011\001\000\185@\144\160rr\144\160ss\145\160\160tt\160\160\001\0014\001\000\172@\145\160\160uu\160\160\001\0015\001\000\188@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\145\160\160\000\\\000\\\160\160\001\001\\\001\000\222@\145\160\160\000]\000]\160\160\001\001]\001\000\254@\145\160\160\000^\000^\160\160\001\001^\001\000\170@\145\160\160\000_\000_\160\160\001\001_\001\000\186@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001\001l\001\000\221@\145\160\160\000m\000m\160\160\001\001m\001\000\253@\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let iso88594_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\128\001\000\129\001\000\130\001\000\131\001\000\132\001\000\133\001\000\134\001\000\135\001\000\136\001\000\137\001\000\138\001\000\139\001\000\140\001\000\141\001\000\142\001\000\143\001\000\144\001\000\145\001\000\146\001\000\147\001\000\148\001\000\149\001\000\150\001\000\151\001\000\152\001\000\153\001\000\154\001\000\155\001\000\156\001\000\157\001\000\158\001\000\159\001\000\160\001\001\004\001\0018\001\001V\001\000\164\001\001(\001\001;\001\000\167\001\000\168\001\001`\001\001\018\001\001\"\001\001f\001\000\173\001\001}\001\000\175\001\000\176\001\001\005\001\002\219\001\001W\001\000\180\001\001)\001\001<\001\002\199\001\000\184\001\001a\001\001\019\001\001#\001\001g\001\001J\001\001~\001\001K\001\001\000\001\000\193\001\000\194\001\000\195\001\000\196\001\000\197\001\000\198\001\001.\001\001\012\001\000\201\001\001\024\001\000\203\001\001\022\001\000\205\001\000\206\001\001*\001\001\016\001\001E\001\001L\001\0016\001\000\212\001\000\213\001\000\214\001\000\215\001\000\216\001\001r\001\000\218\001\000\219\001\000\220\001\001h\001\001j\001\000\223\001\001\001\001\000\225\001\000\226\001\000\227\001\000\228\001\000\229\001\000\230\001\001/\001\001\013\001\000\233\001\001\025\001\000\235\001\001\023\001\000\237\001\000\238\001\001+\001\001\017\001\001F\001\001M\001\0017\001\000\244\001\000\245\001\000\246\001\000\247\001\000\248\001\001s\001\000\250\001\000\251\001\000\252\001\001i\001\001k\001\002\217" 0 : int array);;
+let iso88594_from_unicode = lazy (Marshal.from_string``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\171@\145\160\160cc\160\160\001\001#\001\000\187@\144\160dd\144\160ee\144\160ff\144\160gg\145\160\160hh\160\160\001\001(\001\000\165@\145\160\160ii\160\160\001\001)\001\000\181@\145\160\160jj\160\160\001\001*\001\000\207@\145\160\160kk\160\160\001\001+\001\000\239@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\199@\145\160\160oo\160\160\001\001/\001\000\231@\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\211@\145\160\160ww\160\160\001\0017\001\000\243@\145\160\160xx\160\160\001\0018\001\000\162@\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\166@\145\160\160||\160\160\001\001<\001\000\182@\144\160}}\144\160~~`\000`\160\160\001\001`\001\000\169@\145\160\160\000a\000a\160\160\001\001a\001\000\185@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001\001f\001\000\172@\145\160\160\000g\000g\160\160\001\001g\001\000\188@\145\160\160\000h\000h\160\160\001\001h\001\000\221@\145\160\160\000i\000i\160\160\001\001i\001\000\253@\145\160\160\000j\000j\160\160\001\001j\001\000\222@\145\160\160\000k\000k\160\160\001\001k\001\000\254@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\217@\145\160\160\000s\000s\160\160\001\001s\001\000\249@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\174@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let iso88595_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~t\001\004\n\001\004\011\001\004\012\001\000\173\001\004\014\001\004\015\001\004\016\001\004\017\001\004\018\001\004\019\001\004\020\001\004\021\001\004\022\001\004\023\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004 \001\004!\001\004\"\001\004#\001\004$\001\004%\001\004&\001\004'\001\004(\001\004)\001\004*\001\004+\001\004,\001\004-\001\004.\001\004/\001\0040\001\0041\001\0042\001\0043\001\0044\001\0045\001\0046\001\0047\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001!\022\001\004Q\001\004R\001\004S\001\004T\001\004U\001\004V\001\004W\001\004X\001\004Y\001\004Z\001\004[\001\004\\\001\000\167\001\004^\001\004_" 0 : int array);;
+let iso88595_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\154\000\000\000\000\000\000\007r\000\000\007r\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\161@\145\160\160BB\160\160\001\004\002\001\000\162@\145\160\160CC\160\160\001\004\003\001\000\163@\145\160\160DD\160\160\001\004\004\001\000\164@\145\160\160EE\160\160\001\004\005\001\000\165@\145\160\160FF\160\160\001\004\006\001\000\166@\145\160\160GG\160\160\001\004\007\001\000\167@\145\160\160HH\160\160\001\004\008\001\000\168@\145\160\160II\160\160\001\004\t\001\000\169@\145\160\160JJ\160\160\001\004\n\001\000\170@\145\160\160KK\160\160\001\004\011\001\000\171@\145\160\160LL\160\160\001\004\012\001\000\172@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\174@\145\160\160OO\160\160\001\004\015\001\000\175@\145\160\160PP\160\160\001\004\016\001\000\176@\145\160\160QQ\160\160\001\004\017\001\000\177@\145\160\160RR\160\160\001\004\018\001\000\178@\145\160\160SS\160\160\001\004\019\001\000\179@\145\160\160TT\160\160\001\004\020\001\000\180@\145\160\160UU\160\160\001\004\021\001\000\181@\145\160\160VV\160\160\001\004\022\001\000\182\160\160\001!\022\001\000\240@\145\160\160WW\160\160\001\004\023\001\000\183@\145\160\160XX\160\160\001\004\024\001\000\184@\145\160\160YY\160\160\001\004\025\001\000\185@\145\160\160ZZ\160\160\001\004\026\001\000\186@\145\160\160[[\160\160\001\004\027\001\000\187@\145\160\160\\\\\160\160\001\004\028\001\000\188@\145\160\160]]\160\160\001\004\029\001\000\189@\145\160\160^^\160\160\001\004\030\001\000\190@\145\160\160__\160\160\001\004\031\001\000\191@\145\160\160``\160\160\001\004 \001\000\192@\145\160\160aa\160\160\001\004!\001\000\193@\145\160\160bb\160\160\001\004\"\001\000\194@\145\160\160cc\160\160\001\004#\001\000\195@\145\160\160dd\160\160\001\004$\001\000\196@\145\160\160ee\160\160\001\004%\001\000\197@\145\160\160ff\160\160\001\004&\001\000\198@\145\160\160gg\160\160\001\004'\001\000\199@\145\160\160hh\160\160\001\004(\001\000\200@\145\160\160ii\160\160\001\004)\001\000\201@\145\160\160jj\160\160\001\004*\001\000\202@\145\160\160kk\160\160\001\004+\001\000\203@\145\160\160ll\160\160\001\004,\001\000\204@\145\160\160mm\160\160\001\004-\001\000\205@\145\160\160nn\160\160\001\004.\001\000\206@\145\160\160oo\160\160\001\004/\001\000\207@\145\160\160pp\160\160\001\0040\001\000\208@\145\160\160qq\160\160\001\0041\001\000\209@\145\160\160rr\160\160\001\0042\001\000\210@\145\160\160ss\160\160\001\0043\001\000\211@\145\160\160tt\160\160\001\0044\001\000\212@\145\160\160uu\160\160\001\0045\001\000\213@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\215@\145\160\160xx\160\160\001\0048\001\000\216@\145\160\160yy\160\160\001\0049\001\000\217@\145\160\160zz\160\160\001\004:\001\000\218@\145\160\160{{\160\160\001\004;\001\000\219@\145\160\160||\160\160\001\004<\001\000\220@\145\160\160}}\160\160\001\004=\001\000\221@\145\160\160~~\160\160\001\004>\001\000\222@\145\160\160\127\127\160\160\001\004?\001\000\223@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001\004R\001\000\242@\145\160\160\000S\000S\160\160\001\004S\001\000\243@\145\160\160\000T\000T\160\160\001\004T\001\000\244@\145\160\160\000U\000U\160\160\001\004U\001\000\245@\145\160\160\000V\000V\160\160\001\004V\001\000\246@\145\160\160\000W\000W\160\160\001\004W\001\000\247@\145\160\160\000X\000X\160\160\001\004X\001\000\248@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\249@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\250@\145\160\160\000[\000[\160\160\001\004[\001\000\251@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\252@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\254@\145\160\160\000_\000_\160\160\001\004_\001\000\255@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@@@@\144\160\001\000\167\001\000\253@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88596_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~int array);;
+let iso88596_from_unicode = lazy (Marshal.from_string``\145\160\160aa\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\215@\145\160\160xx\160\160\001\0068\001\000\216@\145\160\160yy\160\160\001\0069\001\000\217@\145\160\160zz\160\160\001\006:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\145\160\160\000A\000A\160\160\001\006A\001\000\225@\145\160\160\000B\000B\160\160\001\006B\001\000\226@\145\160\160\000C\000C\160\160\001\006C\001\000\227@\145\160\160\000D\000D\160\160\001\006D\001\000\228@\145\160\160\000E\000E\160\160\001\006E\001\000\229@\145\160\160\000F\000F\160\160\001\006F\001\000\230@\145\160\160\000G\000G\160\160\001\006G\001\000\231@\145\160\160\000H\000H\160\160\001\006H\001\000\232@\145\160\160\000I\000I\160\160\001\006I\001\000\233@\145\160\160\000J\000J\160\160\001\006J\001\000\234@\145\160\160\000K\000K\160\160\001\006K\001\000\235@\145\160\160\000L\000L\160\160\001\006L\001\000\236@\145\160\160\000M\000M\160\160\001\006M\001\000\237@\145\160\160\000N\000N\160\160\001\006N\001\000\238@\145\160\160\000O\000O\160\160\001\006O\001\000\239@\145\160\160\000P\000P\160\160\001\006P\001\000\240@\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\145\160\160\000R\000R\160\160\001\006R\001\000\242@\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001\000\128\001\000\128\144\160\001\000\129\001\000\129\144\160\001\000\130\001\000\130\144\160\001\000\131\001\000\131\144\160\001\000\132\001\000\132\144\160\001\000\133\001\000\133\144\160\001\000\134\001\000\134\144\160\001\000\135\001\000\135\144\160\001\000\136\001\000\136\144\160\001\000\137\001\000\137\144\160\001\000\138\001\000\138\144\160\001\000\139\001\000\139\144\160\001\000\140\001\000\140\144\160\001\000\141\001\000\141\144\160\001\000\142\001\000\142\144\160\001\000\143\001\000\143\144\160\001\000\144\001\000\144\144\160\001\000\145\001\000\145\144\160\001\000\146\001\000\146\144\160\001\000\147\001\000\147\144\160\001\000\148\001\000\148\144\160\001\000\149\001\000\149\144\160\001\000\150\001\000\150\144\160\001\000\151\001\000\151\144\160\001\000\152\001\000\152\144\160\001\000\153\001\000\153\144\160\001\000\154\001\000\154\144\160\001\000\155\001\000\155\144\160\001\000\156\001\000\156\144\160\001\000\157\001\000\157\144\160\001\000\158\001\000\158\144\160\001\000\159\001\000\159\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@@@@@@@@\144\160\001\000\173\001\000\173@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let iso88597_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002?\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~int array);;
+let iso88597_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\223\000\000\000\000\000\000\006\147\000\000\006\147\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\161@\145\160\160YY\160\160\001 \025\001\000\162@\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let iso88598_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002!\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~int array);;
+let iso88598_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\149\000\000\000\000\000\000\005]\000\000\005]\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\144\160SS\144\160TT\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\223@\144\160XX\144\160YY\144\160ZZ\144\160[[\144\160\\\\\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let iso88599_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~int array);;
+let iso88599_from_unicode = lazy (Marshal.from_string``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ Hashtbl.add Netmappings.to_unicode `Enc_iso88599 iso88599_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88599 iso88599_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88598 iso88598_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88598 iso88598_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88597 iso88597_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88597 iso88597_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88596 iso88596_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88596 iso88596_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88595 iso88595_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88595 iso88595_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88594 iso88594_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88594 iso88594_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88593 iso88593_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88593 iso88593_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88592 iso88592_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88592 iso88592_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885915 iso885915_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885915 iso885915_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885914 iso885914_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885914 iso885914_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885913 iso885913_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885913 iso885913_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso885910 iso885910_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso885910 iso885910_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_iso88591 iso88591_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_iso88591 iso88591_from_unicode;
+();;
--- /dev/null
+(* WARNING! This is a generated file! *)
+let cp037_to_unicode = lazy (Marshal.from_string`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\001\000\162n|hk\000|f\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223adji{\001\000\172mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp037_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000jd\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000ip\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp1006_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\228\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~f\002\000\000\251h\002\000\000\254\153\002\000\000\254\155\002\000\000\254\157\002\000\000\254\159\002\000\000\251z\002\000\000\251||\002\000\000\254}" 0 : int array);;
+let cp1006_from_unicode = lazy (Marshal.from_string``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\145\160\160\000V\000V\160\160\002\000\000\251V\001\000\181@\144\160\000W\000W\145\160\160\000X\000X\160\160\002\000\000\251X\001\000\182@\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\002\000\000\251f\001\000\186@\144\160\000g\000g\145\160\160\000h\000h\160\160\002\000\000\251h\001\000\187@\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\145\160\160\000z\000z\160\160\002\000\000\251z\001\000\192@\144\160\000{\000{\145\160\160\000|\000|\160\160\002\000\000\251|\001\000\193\160\160\002\000\000\254|\001\000\254@\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\255@\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp1026_to_unicode = lazy (Marshal.from_string`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\000{\001\000\241\001\000\199n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\001\001\030\001\0010ji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\000[\001\000\209\001\001_le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\0011z\001\000\214\001\001^g}\001\000\220\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000}\000`\001\000\166\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\001\000\246\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\000]d\000@\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\001\000\231\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\000~\001\000\242\001\000\243\001\000\245\001\001\031\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\000\\\001\000\249\001\000\250\001\000\255\001\000\252\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212c\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219b\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp1026_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\151\000\000\000\000\000\000\006\025\000\000\006\025\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\145\160\160^^\160\160\001\001\030\000Z@\145\160\160__\160\160\001\001\031\001\000\208@\144\160`\000@\144\160a\000O\144\160b\001\000\252\144\160c\001\000\236\144\160d\001\000\173\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\145\160\160\001\0010\000[\160\160p\001\000\240@\145\160\160\001\0011\000y\160\160q\001\000\241@\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000oh\144\160\000\\\001\000\220\144\160\000]\001\000\172\145\160\160\000^\000_\160\160\001\001^\000|@\145\160\160\001\001_\000j\160\160\000_\000m@\144\160\000`\001\000\141\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\000H\144\160\000|\001\000\187\144\160\000}\001\000\140\144\160\000~\001\000\204\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~d\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000J\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w@\144\160\001\000\209\000i\144\160\001\000\210\001\000\237\144\160\001\000\211\001\000\238\144\160\001\000\212\001\000\235\144\160\001\000\213\001\000\239\144\160\001\000\214\000{p\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\224@@\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp424_to_unicode = lazy (Marshal.from_string`\001\005\208\001\005\209\001\005\210\001\005\211\001\005\212\001\005\213\001\005\214\001\005\215\001\005\216\001\000\162n|hk\000|f\001\005\217\001\005\218\001\005\219\001\005\220\001\005\221\001\005\222\001\005\223\001\005\224\001\005\225adji{\001\000\172mo\001\005\226\001\005\227\001\005\228\001\005\229\001\005\230\001\005\231\001\005\232\001\005\233\001\000\166le\000_~\127\000\255\001\005\234\000\255\000\255\001\000\160\000\255\000\255\000\255\001 \023\000`zc\000@g}b\000\255\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\000\255\000\255\000\255\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\000\255\000\255\000\255\001\000\184\000\255\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\000\255\000\255\000\255\000\255\000\255\001\000\174\000^\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\000[\000]\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\000\255\000\255\000\255\000\255\000\255\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\000\255\000\255\000\255\000\255\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\000\255\000\255\000\255\000\255\000\255pqrstuvwxy\001\000\179\000\255\000\255\000\255\000\255\001\000\159" 0 : int array);;
+let cp424_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\135\000\000\000\000\000\000\005K\000\000\005K\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\145\160\160Wf\160\160\001 \023\000x@\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000Z\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\001\000\186\144\160\000\\\001\000\224\144\160\000]\001\000\187\144\160\000^\001\000\176\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000O\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000t@\144\160\001\000\162\000J\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000jb\144\160\001\005\227\000c\144\160\001\005\228\000d\144\160\001\005\229\000e\144\160\001\005\230\000f\144\160\001\005\231\000g\144\160\001\005\232\000h\144\160\001\005\233\000i\144\160\001\005\234\000q@@@@@@@@@@@@\144\160\001\000\247\001\000\225@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp437_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp437_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp500_to_unicode = lazy (Marshal.from_string`\001\000\160\001\000\226\001\000\228\001\000\224\001\000\225\001\000\227\001\000\229\001\000\231\001\000\241\000[n|hkaf\001\000\233\001\000\234\001\000\235\001\000\232\001\000\237\001\000\238\001\000\239\001\000\236\001\000\223\000]dji{\000^mo\001\000\194\001\000\196\001\000\192\001\000\193\001\000\195\001\000\197\001\000\199\001\000\209\001\000\166le\000_~\127\001\000\248\001\000\201\001\000\202\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\000`zc\000@g}b\001\000\216\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\000\171\001\000\187\001\000\240\001\000\253\001\000\254\001\000\177\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\000\170\001\000\186\001\000\230\001\000\184\001\000\198\001\000\164\001\000\181\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\000\161\001\000\191\001\000\208\001\000\221\001\000\222\001\000\174\001\000\162\001\000\163\001\000\165\001\000\183\001\000\169\001\000\167\001\000\182\001\000\188\001\000\189\001\000\190\001\000\172\000|\001\000\175\001\000\168\001\000\180\001\000\215\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\000\244\001\000\246\001\000\242\001\000\243\001\000\245\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\185\001\000\251\001\000\252\001\000\249\001\000\250\001\000\255\000\\\001\000\247\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\212\001\000\214\001\000\210\001\000\211\001\000\213pqrstuvwxy\001\000\179\001\000\219\001\000\220\001\000\217\001\000\218\001\000\159" 0 : int array);;
+let cp500_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\133\000\000\000\000\000\000\006\001\000\000\006\001\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\144\160U}\144\160Vr\144\160Wf\144\160XX\144\160YY\144\160Z\127\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\001\000\187\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\144\160\001\000\133U\144\160\001\000\134F\144\160\001\000\135W\144\160\001\000\136h\144\160\001\000\137i\144\160\001\000\138j\144\160\001\000\139k\144\160\001\000\140l\144\160\001\000\141I\144\160\001\000\142J\144\160\001\000\143[\144\160\001\000\144p\144\160\001\000\145q\144\160\001\000\146Z\144\160\001\000\147s\144\160\001\000\148t\144\160\001\000\149u\144\160\001\000\150v\144\160\001\000\151H\144\160\001\000\152x\144\160\001\000\153y\144\160\001\000\154z\144\160\001\000\155{\144\160\001\000\156D\144\160\001\000\157T\144\160\001\000\158~\144\160\001\000\159\001\000\255\144\160\001\000\160\000A\144\160\001\000\161\001\000\170\144\160\001\000\162\001\000\176\144\160\001\000\163\001\000\177\144\160\001\000\164\001\000\159\144\160\001\000\165\001\000\178\144\160\001\000\166\000jd\144\160\001\000\193\000e\144\160\001\000\194\000b\144\160\001\000\195\000f\144\160\001\000\196\000c\144\160\001\000\197\000g\144\160\001\000\198\001\000\158\144\160\001\000\199\000h\144\160\001\000\200\000t\144\160\001\000\201\000q\144\160\001\000\202\000r\144\160\001\000\203\000s\144\160\001\000\204\000x\144\160\001\000\205\000u\144\160\001\000\206\000v\144\160\001\000\207\000w\144\160\001\000\208\001\000\172\144\160\001\000\209\000ip\144\160\001\000\249\001\000\221\144\160\001\000\250\001\000\222\144\160\001\000\251\001\000\219\144\160\001\000\252\001\000\220\144\160\001\000\253\001\000\141\144\160\001\000\254\001\000\142\144\160\001\000\255\001\000\223" 0 : Netmappings.from_uni_list array);;
+ let cp737_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\201\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\134\001\003\136\001\003\137\001\003\138\001\003\140\001\003\142\001\003\143\001\000\177\001\"e\001\"d\001\003\170\001\003\171\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp737_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007'\000\000\000\000\000\000\006\216\000\000\006\216\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@\144\160\001\003\134\001\000\234@\145\160\160\001%\136\001\000\219\160\160\001\003\136\001\000\235@\144\160\001\003\137\001\000\236\144\160\001\003\138\001\000\237@\145\160\160\001%\140\001\000\221\160\160\001\003\140\001\000\238@@\144\160\001\003\142\001\000\239\144\160\001\003\143\001\000\240\144\160\001%\144\001\000\222\145\160\160\001\003\145\001\000\128\160\160\001%\145\001\000\176@\145\160\160\001\003\146\001\000\129\160\160\001%\146\001\000\177@\145\160\160\001\003\147\001\000\130\160\160\001%\147\001\000\178@\144\160\001\003\148\001\000\131\144\160\001\003\149\001\000\132\144\160\001\003\150\001\000\133\144\160\001\003\151\001\000\134\144\160\001\003\152\001\000\135\144\160\001\003\153\001\000\136\144\160\001\003\154\001\000\137\144\160\001\003\155\001\000\138\144\160\001\003\156\001\000\139\144\160\001\003\157\001\000\140\144\160\001\003\158\001\000\141\144\160\001\003\159\001\000\142\145\160\160\001\003\160\001\000\143\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\003\161\001\000\144@\144\160\001\003\163\001\000\145\144\160\001\003\164\001\000\146\144\160\001\003\165\001\000\147\144\160\001\003\166\001\000\148\144\160\001\003\167\001\000\149\144\160\001\003\168\001\000\150\144\160\001\003\169\001\000\151\144\160\001\003\170\001\000\244\144\160\001\003\171\001\000\245\144\160\001\003\172\001\000\225\144\160\001\003\173\001\000\226\144\160\001\003\174\001\000\227\144\160\001\003\175\001\000\229\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\152\160\160\001\000\177\001\000\241@\145\160\160\001\003\178\001\000\153\160\160\001\000\178\001\000\253@\144\160\001\003\179\001\000\154\144\160\001\003\180\001\000\155\144\160\001\003\181\001\000\156\144\160\001\003\182\001\000\157\145\160\160\001\003\183\001\000\158\160\160\001\000\183\001\000\250@\144\160\001\003\184\001\000\159\144\160\001\003\185\001\000\160\144\160\001\003\186\001\000\161\144\160\001\003\187\001\000\162\144\160\001\003\188\001\000\163\144\160\001\003\189\001\000\164\144\160\001\003\190\001\000\165\144\160\001\003\191\001\000\166\144\160\001\003\192\001\000\167\144\160\001\003\193\001\000\168\144\160\001\003\194\001\000\170\144\160\001\003\195\001\000\169\144\160\001\003\196\001\000\171\144\160\001\003\197\001\000\172\144\160\001\003\198\001\000\173\144\160\001\003\199\001\000\174\144\160\001\003\200\001\000\175\144\160\001\003\201\001\000\224\144\160\001\003\202\001\000\228\144\160\001\003\203\001\000\232\144\160\001\003\204\001\000\230\144\160\001\003\205\001\000\231\144\160\001\003\206\001\000\233@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\246@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp775_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\001\006\001\000\252\001\000\233\001\001\001\001\000\228\001\001#\001\000\229\001\001\007\001\001B\001\001\019\001\001V\001\001W\001\001+\001\001y\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\001M\001\000\246\001\001\"\001\000\162\001\001Z\001\001[\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001\000\215\001\000\164\001\001\000\001\001*\001\000\243\001\001{\001\001|\001\001z\001 \029\001\000\166\001\000\169\001\000\174\001\000\172\001\000\189\001\000\188\001\001A\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\001\004\001\001\012\001\001\024\001\001\022\001%c\001%Q\001%W\001%]\001\001.\001\001`\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001r\001\001j\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\001}\001\001\005\001\001\013\001\001\025\001\001\023\001\001/\001\001a\001\001s\001\001k\001\001~\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\000\211\001\000\223\001\001L\001\001C\001\000\245\001\000\213\001\000\181\001\001D\001\0016\001\0017\001\001;\001\001<\001\001F\001\001\018\001\001E\001 \025\001\000\173\001\000\177\001 \028\001\000\190\001\000\182\001\000\167\001\000\247\001 \030\001\000\176\001\"\025\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp775_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007U\000\000\000\000\000\000\007\019\000\000\007\019\008\000\004\000\000\145\160\160@@\160\160\001\001\000\001\000\160\160\160\001%\000\001\000\196@\145\160\160AA\160\160\001\001\001\001\000\131@\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\145\160\160DD\160\160\001\001\004\001\000\181@\145\160\160EE\160\160\001\001\005\001\000\208@\145\160\160FF\160\160\001\001\006\001\000\128@\145\160\160GG\160\160\001\001\007\001\000\135@\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001\001\012\001\000\182\160\160\001%\012\001\000\218@\145\160\160MM\160\160\001\001\013\001\000\209@\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\145\160\160RR\160\160\001\001\018\001\000\237@\145\160\160SS\160\160\001\001\019\001\000\137@\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\145\160\160VV\160\160\001\001\022\001\000\184@\145\160\160WW\160\160\001\001\023\001\000\211@\145\160\160XX\160\160\001\001\024\001\000\183\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\001\025\001\000\210\160\160\001 \025\001\000\239\160\160\001\"\025\001\000\249@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001 \028\001\000\242@\145\160\160]]\160\160\001 \029\001\000\166@\145\160\160^^\160\160\001 \030\001\000\247@\144\160__\144\160``\144\160aa\145\160\160bb\160\160\001\001\"\001\000\149@\145\160\160cc\160\160\001\001#\001\000\133@\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\161@\145\160\160kk\160\160\001\001+\001\000\140@\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\145\160\160nn\160\160\001\001.\001\000\189@\145\160\160oo\160\160\001\001/\001\000\212@\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\145\160\160vv\160\160\001\0016\001\000\232@\145\160\160ww\160\160\001\0017\001\000\233@\144\160xx\144\160yy\144\160zz\145\160\160{{\160\160\001\001;\001\000\234@\145\160\160||\160\160\001%<\001\000\197\160\160\001\001<\001\000\235@\144\160}}\144\160~~`\000`\160\160\001\001`\001\000\190\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001\001a\001\000\213@\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001\001j\001\000\199@\145\160\160\000k\000k\160\160\001\001k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\198@\145\160\160\000s\000s\160\160\001\001s\001\000\214@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\165@\145\160\160\000{\000{\160\160\001\001{\001\000\163@\145\160\160\000|\000|\160\160\001\001|\001\000\164@\145\160\160\000}\000}\160\160\001\001}\001\000\207@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let cp850_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%lint array);;
+let cp850_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\211\000\000\000\000\000\000\006i\000\000\006i\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\145\160\160qq\160\160\001\0011\001\000\213@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp852_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\001o\001\001\007\001\000\231\001\001B\001\000\235\001\001P\001\001Q\001\000\238\001\001y\001\000\196\001\001\006\001\000\201\001\0019\001\001:\001\000\244\001\000\246\001\001=\001\001>\001\001Z\001\001[\001\000\214\001\000\220\001\001d\001\001e\001\001A\001\000\215\001\001\013\001\000\225\001\000\237\001\000\243\001\000\250\001\001\004\001\001\005\001\001}\001\001~\001\001\024\001\001\025\001\000\172\001\001z\001\001\012\001\001_\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\000\193\001\000\194\001\001\026\001\001^\001%c\001%Q\001%W\001%]\001\001{\001\001|\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\001\002\001\001\003\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\001\017\001\001\016\001\001\014\001\000\203\001\001\015\001\001G\001\000\205\001\000\206\001\001\027\001%\024\001%\012\001%\136\001%\132\001\001b\001\001n\001%\128\001\000\211\001\000\223\001\000\212\001\001C\001\001D\001\001H\001\001`\001\001a\001\001T\001\000\218\001\001U\001\001p\001\000\253\001\000\221\001\001c\001\000\180\001\000\173\001\002\221\001\002\219\001\002\199\001\002\216\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\002\217\001\001q\001\001X\001\001Y\001%\160\001\000\160" 0 : int array);;
+let cp852_from_unicode = lazy (Marshal.from_string``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001\0019\001\000\145@\145\160\160zz\160\160\001\001:\001\000\146@\144\160{{\145\160\160||\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\001=\001\000\149@\145\160\160~~\160\160\001\001>\001\000\150@\144\160\127\127\144\160\000@\000@\145\160\160\000A\000A\160\160\001\001A\001\000\157@\145\160\160\000B\000B\160\160\001\001B\001\000\136@\145\160\160\000C\000C\160\160\001\001C\001\000\227@\145\160\160\000D\000D\160\160\001\001D\001\000\228@\144\160\000E\000E\144\160\000F\000F\145\160\160\000G\000G\160\160\001\001G\001\000\213@\145\160\160\000H\000H\160\160\001\001H\001\000\229@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001\001P\001\000\138\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\001Q\001\000\139\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\001T\001\000\232@\145\160\160\000U\000U\160\160\001\001U\001\000\234@\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\001X\001\000\252@\145\160\160\000Y\000Y\160\160\001\001Y\001\000\253@\145\160\160\000Z\000Z\160\160\001\001Z\001\000\151\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\001[\001\000\152@\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\184@\145\160\160\000_\000_\160\160\001\001_\001\000\173@\145\160\160\000`\000`\160\160\001%`\001\000\204\160\160\001\001`\001\000\230@\145\160\160\000a\000a\160\160\001\001a\001\000\231@\145\160\160\000b\000b\160\160\001\001b\001\000\221@\145\160\160\000c\000c\160\160\001%c\001\000\185\160\160\001\001c\001\000\238@\145\160\160\000d\000d\160\160\001\001d\001\000\155@\145\160\160\000e\000e\160\160\001\001e\001\000\156@\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\222@\145\160\160\000o\000o\160\160\001\001o\001\000\133@\145\160\160\000p\000p\160\160\001\001p\001\000\235@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\141@\145\160\160\000z\000z\160\160\001\001z\001\000\171@\145\160\160\000{\000{\160\160\001\001{\001\000\189@\145\160\160\000|\000|\160\160\001\001|\001\000\190@\145\160\160\000}\000}\160\160\001\001}\001\000\166@\145\160\160\000~\000~\160\160\001\001~\001\000\167@\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\245\144\160\001\000\168\001\000\249@@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170\144\160\001\000\173\001\000\240@@\144\160\001\000\176\001\000\248@@@\144\160\001\000\180\001\000\239@@@\144\160\001\000\184\001\000\247@@\144\160\001\000\187\001\000\175@@@@@\144\160\001\000\193\001\000\181\144\160\001\000\194\001\000\182@\144\160\001\000\196\001\000\142@@\145\160\160\001\000\199\001\000\128\160\160\001\002\199\001\000\243@@\144\160\001\000\201\001\000\144@\144\160\001\000\203\001\000\211@\144\160\001\000\205\001\000\214\144\160\001\000\206\001\000\215@@@@\144\160\001\000\211\001\000\224\144\160\001\000\212\001\000\226@\144\160\001\000\214\001\000\153\144\160\001\000\215\001\000\158\144\160\001\002\216\001\000\244\144\160\001\002\217\001\000\250\144\160\001\000\218\001\000\233\144\160\001\002\219\001\000\242\144\160\001\000\220\001\000\154\145\160\160\001\000\221\001\000\237\160\160\001\002\221\001\000\241@@\144\160\001\000\223\001\000\225@\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132@@\144\160\001\000\231\001\000\135@\144\160\001\000\233\001\000\130@\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161\144\160\001\000\238\001\000\140@@@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246@@\144\160\001\000\250\001\000\163@\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\236@@" 0 : Netmappings.from_uni_list array);;
+ let cp855_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004R\001\004\002\001\004S\001\004\003\001\004Q\001\004\001\001\004T\001\004\004\001\004U\001\004\005\001\004V\001\004\006\001\004W\001\004\007\001\004X\001\004\008\001\004Y\001\004\t\001\004Z\001\004\n\001\004[\001\004\011\001\004\\\001\004\012\001\004^\001\004\014\001\004_\001\004\015\001\004N\001\004.\001\004J\001\004*\001\0040\001\004\016\001\0041\001\004\017\001\004F\001\004&\001\0044\001\004\020\001\0045\001\004\021\001\004D\001\004$\001\0043\001\004\019\001\000\171\001\000\187\001%\145\001%\146\001%\147\001%\002\001%$\001\004E\001\004%\001\0048\001\004\024\001%c\001%Q\001%W\001%]\001\0049\001\004\025\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\004:\001\004\026\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\001\004;\001\004\027\001\004<\001\004\028\001\004=\001\004\029\001\004>\001\004\030\001\004?\001%\024\001%\012\001%\136\001%\132\001\004\031\001\004O\001%\128\001\004/\001\004@\001\004 \001\004A\001\004!\001\004B\001\004\"\001\004C\001\004#\001\0046\001\004\022\001\0042\001\004\018\001\004L\001\004,\001!\022\001\000\173\001\004K\001\004+\001\0047\001\004\023\001\004H\001\004(\001\004M\001\004-\001\004I\001\004)\001\004G\001\004'\001\000\167\001%\160\001\000\160" 0 : int array);;
+let cp855_from_unicode = lazy (Marshal.from_stringt\001\000\145@\145\160\160JJ\160\160\001\004\n\001\000\147@\145\160\160KK\160\160\001\004\011\001\000\149@\145\160\160LL\160\160\001\004\012\001\000\151\160\160\001%\012\001\000\218@\144\160MM\145\160\160NN\160\160\001\004\014\001\000\153@\145\160\160OO\160\160\001\004\015\001\000\155@\145\160\160PP\160\160\001\004\016\001\000\161\160\160\001%\016\001\000\191@\145\160\160QQ\160\160\001\004\017\001\000\163@\145\160\160RR\160\160\001\004\018\001\000\236@\145\160\160SS\160\160\001\004\019\001\000\173@\145\160\160TT\160\160\001\004\020\001\000\167\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001\004\021\001\000\169@\145\160\160VV\160\160\001\004\022\001\000\234\160\160\001!\022\001\000\239@\145\160\160WW\160\160\001\004\023\001\000\244@\145\160\160XX\160\160\001\004\024\001\000\184\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\004\025\001\000\190@\145\160\160ZZ\160\160\001\004\026\001\000\199@\145\160\160[[\160\160\001\004\027\001\000\209@\145\160\160\\\\\160\160\001%\028\001\000\195\160\160\001\004\028\001\000\211@\145\160\160]]\160\160\001\004\029\001\000\213@\145\160\160^^\160\160\001\004\030\001\000\215@\145\160\160__\160\160\001\004\031\001\000\221@\145\160\160``\160\160\001\004 \001\000\226@\145\160\160aa\160\160\001\004!\001\000\228@\145\160\160bb\160\160\001\004\"\001\000\230@\145\160\160cc\160\160\001\004#\001\000\232@\145\160\160dd\160\160\001\004$\001\000\171\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\182@\145\160\160ff\160\160\001\004&\001\000\165@\145\160\160gg\160\160\001\004'\001\000\252@\145\160\160hh\160\160\001\004(\001\000\246@\145\160\160ii\160\160\001\004)\001\000\250@\145\160\160jj\160\160\001\004*\001\000\159@\145\160\160kk\160\160\001\004+\001\000\242@\145\160\160ll\160\160\001%,\001\000\194\160\160\001\004,\001\000\238@\145\160\160mm\160\160\001\004-\001\000\248@\145\160\160nn\160\160\001\004.\001\000\157@\145\160\160oo\160\160\001\004/\001\000\224@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\162@\145\160\160rr\160\160\001\0042\001\000\235@\145\160\160ss\160\160\001\0043\001\000\172@\145\160\160tt\160\160\001\0044\001\000\166\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\168@\145\160\160vv\160\160\001\0046\001\000\233@\145\160\160ww\160\160\001\0047\001\000\243@\145\160\160xx\160\160\001\0048\001\000\183@\145\160\160yy\160\160\001\0049\001\000\189@\145\160\160zz\160\160\001\004:\001\000\198@\145\160\160{{\160\160\001\004;\001\000\208@\145\160\160||\160\160\001%<\001\000\197\160\160\001\004<\001\000\210@\145\160\160}}\160\160\001\004=\001\000\212@\145\160\160~~\160\160\001\004>\001\000\214@\145\160\160\127\127\160\160\001\004?\001\000\216@\145\160\160\000@\000@\160\160\001\004@\001\000\225@\145\160\160\000A\000A\160\160\001\004A\001\000\227@\145\160\160\000B\000B\160\160\001\004B\001\000\229@\145\160\160\000C\000C\160\160\001\004C\001\000\231@\145\160\160\000D\000D\160\160\001\004D\001\000\170@\145\160\160\000E\000E\160\160\001\004E\001\000\181@\145\160\160\000F\000F\160\160\001\004F\001\000\164@\145\160\160\000G\000G\160\160\001\004G\001\000\251@\145\160\160\000H\000H\160\160\001\004H\001\000\245@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\158@\145\160\160\000K\000K\160\160\001\004K\001\000\241@\145\160\160\000L\000L\160\160\001\004L\001\000\237@\145\160\160\000M\000M\160\160\001\004M\001\000\247@\145\160\160\000N\000N\160\160\001\004N\001\000\156@\145\160\160\000O\000O\160\160\001\004O\001\000\222@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001\004Q\001\000\132\160\160\001%Q\001\000\186@\145\160\160\000R\000R\160\160\001\004R\001\000\128@\145\160\160\000S\000S\160\160\001\004S\001\000\130@\145\160\160\000T\000T\160\160\001\004T\001\000\134\160\160\001%T\001\000\201@\145\160\160\000U\000U\160\160\001\004U\001\000\136@\145\160\160\000V\000V\160\160\001\004V\001\000\138@\145\160\160\000W\000W\160\160\001\004W\001\000\140\160\160\001%W\001\000\187@\145\160\160\000X\000X\160\160\001\004X\001\000\142@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\144@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\146\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001\004[\001\000\148@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\150@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\004^\001\000\152@\145\160\160\000_\000_\160\160\001\004_\001\000\154@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@@@@@@\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\207@@\144\160\001\000\167\001\000\253@@@\144\160\001\000\171\001\000\174@\144\160\001\000\173\001\000\240@@@@@@@@@@@@@\144\160\001\000\187\001\000\175@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp856_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\028\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\000\255\000\255\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001\000\164\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001%\024\001%\012\001%\136\001%\132\001\000\166\000\255\001%\128\000\255\000\255\000\255\000\255\000\255\000\255\001\000\181\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\175\001\000\180\001\000\173\001\000\177\001 \023\001\000\190\001\000\182\001\000\167\001\000\247\001\000\184\001\000\176\001\000\168\001\000\183\001\000\185\001\000\179\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp856_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\005\177\000\000\000\000\000\000\005\152\000\000\005\152\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\242@\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp857_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002B\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~c\001%Q\001%W\001%]\001\000\162\001\000\165\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\000\227\001\000\195\001%Z\001%T\001%i\001%f\001%`\001%P\001%lint array);;
+let cp857_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\199\000\000\000\000\000\000\006f\000\000\006f\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\144\160YY\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\001\030\001\000\166@\145\160\160__\160\160\001\001\031\001\000\167@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001\0010\001\000\152@\145\160\160qq\160\160\001\0011\001\000\141@\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001\001^\001\000\158@\145\160\160\000_\000_\160\160\001\001_\001\000\159@\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp860_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp860_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007-\000\000\000\000\000\000\006\224\000\000\006\224\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp861_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp861_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\145\160\160\000\127\000\127\160\160\001 \127\001\000\252@\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\145\160\160\001\001\146\001\000\159\160\160\001%\146\001\000\177@\145\160\160\001%\147\001\000\178\160\160\001\003\147\001\000\226@@@@@\144\160\001\003\152\001\000\233@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@\144\160\001\000\161\001\000\173@\145\160\160\001\000\163\001\000\156\160\160\001\003\163\001\000\228@@@\144\160\001\003\166\001\000\232\144\160\001 \167\001\000\158@\144\160\001\003\169\001\000\234@\144\160\001\000\171\001\000\174\144\160\001\000\172\001\000\170@@@\144\160\001\000\176\001\000\248\145\160\160\001\003\177\001\000\224\160\160\001\000\177\001\000\241@\144\160\001\000\178\001\000\253@\144\160\001\003\180\001\000\235\145\160\160\001\000\181\001\000\230\160\160\001\003\181\001\000\238@@\144\160\001\000\183\001\000\250@@@\144\160\001\000\187\001\000\175\144\160\001\000\188\001\000\172\144\160\001\000\189\001\000\171@\144\160\001\000\191\001\000\168\144\160\001\003\192\001\000\227\144\160\001\000\193\001\000\164@\144\160\001\003\195\001\000\229\145\160\160\001\000\196\001\000\142\160\160\001\003\196\001\000\231@\144\160\001\000\197\001\000\143\145\160\160\001\000\198\001\000\146\160\160\001\003\198\001\000\237@\144\160\001\000\199\001\000\128@\144\160\001\000\201\001\000\144@@@\144\160\001\000\205\001\000\165@@\144\160\001\000\208\001\000\139@@\144\160\001\000\211\001\000\166@@\144\160\001\000\214\001\000\153@\144\160\001\000\216\001\000\157@\144\160\001\000\218\001\000\167@\144\160\001\000\220\001\000\154\144\160\001\000\221\001\000\151\144\160\001\000\222\001\000\141\144\160\001\000\223\001\000\225\144\160\001\000\224\001\000\133\144\160\001\000\225\001\000\160\144\160\001\000\226\001\000\131@\144\160\001\000\228\001\000\132\144\160\001\000\229\001\000\134\144\160\001\000\230\001\000\145\144\160\001\000\231\001\000\135\144\160\001\000\232\001\000\138\144\160\001\000\233\001\000\130\144\160\001\000\234\001\000\136\144\160\001\000\235\001\000\137@\144\160\001\000\237\001\000\161@@\144\160\001\000\240\001\000\140@@\144\160\001\000\243\001\000\162\144\160\001\000\244\001\000\147@\144\160\001\000\246\001\000\148\144\160\001\000\247\001\000\246\144\160\001\000\248\001\000\155@\144\160\001\000\250\001\000\163\144\160\001\000\251\001\000\150\144\160\001\000\252\001\000\129\144\160\001\000\253\001\000\152\144\160\001\000\254\001\000\149@" 0 : Netmappings.from_uni_list array);;
+ let cp862_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp862_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0074\000\000\000\000\000\000\006\233\000\000\006\233\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp863_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp863_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0077\000\000\000\000\000\000\006\237\000\000\006\237\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\145\160\160WW\160\160\001 \023\001\000\141@\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp864_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\209\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcd\001\006jfghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\176\001\000\183\001\"\025\001\"\026\001%\146\001%\000\001%\002\001%<\001%$\001%,\001%\028\001%4\001%\016\001%\012\001%\020\001%\024\001\003\178\001\"\030\001\003\198\001\000\177\001\000\189\001\000\188\001\"H\001\000\171\001\000\187\002\000\000\254\247\002\000\000\254\248\000\255\000\255\002\000\000\254\251\002\000\000\254\252\000\255\001\000\160\001\000\173\002\000\000\254\130\001\000\163\001\000\164\002\000\000\254\132\000\255\000\255\002\000\000\254\142\002\000\000\254\143\002\000\000\254\149\002\000\000\254\153\001\006\012\002\000\000\254\157\002\000\000\254\161\002\000\000\254\165\001\006`\001\006a\001\006b\001\006c\001\006d\001\006e\001\006f\001\006g\001\006h\001\006i}\001\006Q\002\000\000\254\229\002\000\000\254\233\002\000\000\254\236\002\000\000\254\240\002\000\000\254\242\002\000\000\254\208\002\000\000\254\213\002\000\000\254\245\002\000\000\254\246\002\000\000\254\221\002\000\000\254\217\002\000\000\254\241\001%\160\000\255" 0 : int array);;
+let cp864_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007i\000\000\000\000\000\000\006\136\000\000\006\136\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\133@\144\160AA\145\160\160BB\160\160\001%\002\001\000\134@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\141\160\160\001\006\012\001\000\172@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\140@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\142@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\143@\145\160\160YY\160\160\001\"\025\001\000\130@\145\160\160ZZ\160\160\001\"\026\001\000\131@\145\160\160[[\160\160\001\006\027\001\000\187@\145\160\160\\\\\160\160\001%\028\001\000\138@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\145@\145\160\160__\160\160\001\006\031\001\000\191@\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\136@@\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\137@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\139@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\135@\144\160}}\144\160~~\144\160\127\127\145\160\160\000@\000@\160\160\001\006@\001\000\224@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\150@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\006Q\001\000\241@\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\006`\001\000\176@\145\160\160\000a\000a\160\160\001\006a\001\000\177@\145\160\160\000b\000b\160\160\001\006b\001\000\178@\145\160\160\000c\000c\160\160\001\006c\001\000\179@\145\160\160\000d\000d\160\160\001\006d\001\000\180@\145\160\160\000e\000e\160\160\001\006e\001\000\181@\145\160\160\000f\000f\160\160\001\006f\001\000\182@\145\160\160\000g\000g\160\160\001\006g\001\000\183@\145\160\160\000h\000h\160\160\001\006h\001\000\184@\145\160\160\000i\000i\160\160\001\006i\001\000\185@\145\160\160\001\006je\160\160\000j\000j@\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\002\000\000\254}\001\000\240@\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp865_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\000\199\001\000\252\001\000\233\001\000\226\001\000\228\001\000\224\001\000\229\001\000\231\001\000\234\001\000\235\001\000\232\001\000\239\001\000\238\001\000\236\001\000\196\001\000\197\001\000\201\001\000\230\001\000\198\001\000\244\001\000\246\001\000\242\001\000\251\001\000\249\001\000\255\001\000\214\001\000\220\001\000\248\001\000\163\001\000\216\001 \167\001\001\146\001\000\225\001\000\237\001\000\243\001\000\250\001\000\241\001\000\209\001\000\170\001\000\186\001\000\191\001#\016\001\000\172\001\000\189\001\000\188\001\000\161\001\000\171\001\000\164\001%\145\001%\146\001%\147\001%\002\001%$\001%a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\003\177\001\000\223\001\003\147\001\003\192\001\003\163\001\003\195\001\000\181\001\003\196\001\003\166\001\003\152\001\003\169\001\003\180\001\"\030\001\003\198\001\003\181\001\")\001\"a\001\000\177\001\"e\001\"d\001# \001#!\001\000\247\001\"H\001\000\176\001\"\025\001\000\183\001\"\026\001 \127\001\000\178\001%\160\001\000\160" 0 : int array);;
+let cp865_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0071\000\000\000\000\000\000\006\229\000\000\006\229\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001#\016\001\000\169\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001\"\025\001\000\249@\145\160\160ZZ\160\160\001\"\026\001\000\251@\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\145\160\160^^\160\160\001\"\030\001\000\236@\144\160__\145\160\160``\160\160\001# \001\000\244@\145\160\160aa\160\160\001#!\001\000\245@\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\145\160\160ii\160\160\001\")\001\000\239@\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181\160\160\001\"a\001\000\240@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209\160\160\001\"d\001\000\243@\145\160\160\000e\000e\160\160\001%e\001\000\210\160\160\001\"e\001\000\242@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp866_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~a\001%b\001%V\001%U\001%c\001%Q\001%W\001%]\001%\\\001%[\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001%^\001%_\001%Z\001%T\001%i\001%f\001%`\001%P\001%l\001%g\001%h\001%d\001%e\001%Y\001%X\001%R\001%S\001%k\001%j\001%\024\001%\012\001%\136\001%\132\001%\140\001%\144\001%\128\001\004@\001\004A\001\004B\001\004C\001\004D\001\004E\001\004F\001\004G\001\004H\001\004I\001\004J\001\004K\001\004L\001\004M\001\004N\001\004O\001\004\001\001\004Q\001\004\004\001\004T\001\004\007\001\004W\001\004\014\001\004^\001\000\176\001\"\025\001\000\183\001\"\026\001!\022\001\000\164\001%\160\001\000\160" 0 : int array);;
+let cp866_from_unicode = lazy (Marshal.from_string``\160\160\001\004 \001\000\144@\145\160\160aa\160\160\001\004!\001\000\145@\145\160\160bb\160\160\001\004\"\001\000\146@\145\160\160cc\160\160\001\004#\001\000\147@\145\160\160dd\160\160\001\004$\001\000\148\160\160\001%$\001\000\180@\145\160\160ee\160\160\001\004%\001\000\149@\145\160\160ff\160\160\001\004&\001\000\150@\145\160\160gg\160\160\001\004'\001\000\151@\145\160\160hh\160\160\001\004(\001\000\152@\145\160\160ii\160\160\001\004)\001\000\153@\145\160\160jj\160\160\001\004*\001\000\154@\145\160\160kk\160\160\001\004+\001\000\155@\145\160\160ll\160\160\001\004,\001\000\156\160\160\001%,\001\000\194@\145\160\160mm\160\160\001\004-\001\000\157@\145\160\160nn\160\160\001\004.\001\000\158@\145\160\160oo\160\160\001\004/\001\000\159@\145\160\160pp\160\160\001\0040\001\000\160@\145\160\160qq\160\160\001\0041\001\000\161@\145\160\160rr\160\160\001\0042\001\000\162@\145\160\160ss\160\160\001\0043\001\000\163@\145\160\160tt\160\160\001\0044\001\000\164\160\160\001%4\001\000\193@\145\160\160uu\160\160\001\0045\001\000\165@\145\160\160vv\160\160\001\0046\001\000\166@\145\160\160ww\160\160\001\0047\001\000\167@\145\160\160xx\160\160\001\0048\001\000\168@\145\160\160yy\160\160\001\0049\001\000\169@\145\160\160zz\160\160\001\004:\001\000\170@\145\160\160{{\160\160\001\004;\001\000\171@\145\160\160||\160\160\001\004<\001\000\172\160\160\001%<\001\000\197@\145\160\160}}\160\160\001\004=\001\000\173@\145\160\160~~\160\160\001\004>\001\000\174@\145\160\160\127\127\160\160\001\004?\001\000\175@\145\160\160\000@\000@\160\160\001\004@\001\000\224@\145\160\160\000A\000A\160\160\001\004A\001\000\225@\145\160\160\000B\000B\160\160\001\004B\001\000\226@\145\160\160\000C\000C\160\160\001\004C\001\000\227@\145\160\160\000D\000D\160\160\001\004D\001\000\228@\145\160\160\000E\000E\160\160\001\004E\001\000\229@\145\160\160\000F\000F\160\160\001\004F\001\000\230@\145\160\160\000G\000G\160\160\001\004G\001\000\231@\145\160\160\000H\000H\160\160\001\004H\001\000\232@\145\160\160\000I\000I\160\160\001\004I\001\000\233@\145\160\160\000J\000J\160\160\001\004J\001\000\234@\145\160\160\000K\000K\160\160\001\004K\001\000\235@\145\160\160\000L\000L\160\160\001\004L\001\000\236@\145\160\160\000M\000M\160\160\001\004M\001\000\237@\145\160\160\000N\000N\160\160\001\004N\001\000\238@\145\160\160\000O\000O\160\160\001\004O\001\000\239@\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186\160\160\001\004Q\001\000\241@\145\160\160\000R\000R\160\160\001%R\001\000\213@\145\160\160\000S\000S\160\160\001%S\001\000\214@\145\160\160\000T\000T\160\160\001%T\001\000\201\160\160\001\004T\001\000\243@\145\160\160\000U\000U\160\160\001%U\001\000\184@\145\160\160\000V\000V\160\160\001%V\001\000\183@\145\160\160\000W\000W\160\160\001%W\001\000\187\160\160\001\004W\001\000\245@\145\160\160\000X\000X\160\160\001%X\001\000\212@\145\160\160\000Y\000Y\160\160\001%Y\001\000\211@\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\145\160\160\000[\000[\160\160\001%[\001\000\190@\145\160\160\000\\\000\\\160\160\001%\\\001\000\189@\145\160\160\000]\000]\160\160\001%]\001\000\188@\145\160\160\000^\000^\160\160\001%^\001\000\198\160\160\001\004^\001\000\247@\145\160\160\000_\000_\160\160\001%_\001\000\199@\145\160\160\000`\000`\160\160\001%`\001\000\204@\145\160\160\000a\000a\160\160\001%a\001\000\181@\145\160\160\000b\000b\160\160\001%b\001\000\182@\145\160\160\000c\000c\160\160\001%c\001\000\185@\145\160\160\000d\000d\160\160\001%d\001\000\209@\145\160\160\000e\000e\160\160\001%e\001\000\210@\145\160\160\000f\000f\160\160\001%f\001\000\203@\145\160\160\000g\000g\160\160\001%g\001\000\207@\145\160\160\000h\000h\160\160\001%h\001\000\208@\145\160\160\000i\000i\160\160\001%i\001\000\202@\145\160\160\000j\000j\160\160\001%j\001\000\216@\145\160\160\000k\000k\160\160\001%k\001\000\215@\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\223@@@\144\160\001%\132\001\000\220@@@\144\160\001%\136\001\000\219@@@\144\160\001%\140\001\000\221@@@\144\160\001%\144\001\000\222\144\160\001%\145\001\000\176\144\160\001%\146\001\000\177\144\160\001%\147\001\000\178@@@@@@@@@@@@\145\160\160\001%\160\001\000\254\160\160\001\000\160\001\000\255@@@@\144\160\001\000\164\001\000\253@@@@@@@@@@@\144\160\001\000\176\001\000\248@@@@@@\144\160\001\000\183\001\000\250@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp869_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~c\001%Q\001%W\001%]\001\003\158\001\003\159\001%\016\001%\020\001%4\001%,\001%\028\001%\000\001%<\001\003\160\001\003\161\001%Z\001%T\001%i\001%f\001%`\001%P\001%lint array);;
+let cp869_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\207\000\000\000\000\000\000\006\138\000\000\006\138\008\000\004\000\000\145\160\160@@\160\160\001%\000\001\000\196@\144\160AA\145\160\160BB\160\160\001%\002\001\000\179@\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\145\160\160LL\160\160\001%\012\001\000\218@\144\160MM\144\160NN\144\160OO\145\160\160PP\160\160\001%\016\001\000\191@\144\160QQ\144\160RR\144\160SS\145\160\160TT\160\160\001%\020\001\000\192@\145\160\160UU\160\160\001 \021\001\000\142@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\139\160\160\001%\024\001\000\217@\145\160\160YY\160\160\001 \025\001\000\140@\144\160ZZ\144\160[[\145\160\160\\\\\160\160\001%\028\001\000\195@\144\160]]\144\160^^\144\160__\144\160``\144\160aa\144\160bb\144\160cc\145\160\160dd\160\160\001%$\001\000\180@\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\145\160\160ll\160\160\001%,\001\000\194@\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\145\160\160tt\160\160\001%4\001\000\193@\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001%<\001\000\197@\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\145\160\160\000P\000P\160\160\001%P\001\000\205@\145\160\160\000Q\000Q\160\160\001%Q\001\000\186@\144\160\000R\000R\144\160\000S\000S\145\160\160\000T\000T\160\160\001%T\001\000\201@\144\160\000U\000U\144\160\000V\000V\145\160\160\000W\000W\160\160\001%W\001\000\187@\144\160\000X\000X\144\160\000Y\000Y\145\160\160\000Z\000Z\160\160\001%Z\001\000\200@\144\160\000[\000[\144\160\000\\\000\\\145\160\160\000]\000]\160\160\001%]\001\000\188@\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001%`\001\000\204@\144\160\000a\000a\144\160\000b\000b\145\160\160\000c\000c\160\160\001%c\001\000\185@\144\160\000d\000d\144\160\000e\000e\145\160\160\000f\000f\160\160\001%f\001\000\203@\144\160\000g\000g\144\160\000h\000h\145\160\160\000i\000i\160\160\001%i\001\000\202@\144\160\000j\000j\144\160\000k\000k\145\160\160\000l\000l\160\160\001%l\001\000\206@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let cp874_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002&\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\000\255\000\255\000\255\001 &\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\160\001\014\001\001\014\002\001\014\003\001\014\004\001\014\005\001\014\006\001\014\007\001\014\008\001\014\t\001\014\nint array);;
+let cp874_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\185\000\000\000\000\000\000\006\202\000\000\006\202\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\014\001\001\000\161@\145\160\160BB\160\160\001\014\002\001\000\162@\145\160\160CC\160\160\001\014\003\001\000\163@\145\160\160DD\160\160\001\014\004\001\000\164@\145\160\160EE\160\160\001\014\005\001\000\165@\145\160\160FF\160\160\001\014\006\001\000\166@\145\160\160GG\160\160\001\014\007\001\000\167@\145\160\160HH\160\160\001\014\008\001\000\168@\145\160\160II\160\160\001\014\t\001\000\169@\145\160\160JJ\160\160\001\014\n``\160\160\001\014 \001\000\192@\145\160\160aa\160\160\001\014!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001\014\"\001\000\194@\145\160\160cc\160\160\001\014#\001\000\195@\145\160\160dd\160\160\001\014$\001\000\196@\145\160\160ee\160\160\001\014%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\014&\001\000\198@\145\160\160gg\160\160\001\014'\001\000\199@\145\160\160hh\160\160\001\014(\001\000\200@\145\160\160ii\160\160\001\014)\001\000\201@\145\160\160jj\160\160\001\014*\001\000\202@\145\160\160kk\160\160\001\014+\001\000\203@\145\160\160ll\160\160\001\014,\001\000\204@\145\160\160mm\160\160\001\014-\001\000\205@\145\160\160nn\160\160\001\014.\001\000\206@\145\160\160oo\160\160\001\014/\001\000\207@\145\160\160pp\160\160\001\0140\001\000\208@\145\160\160qq\160\160\001\0141\001\000\209@\145\160\160rr\160\160\001\0142\001\000\210@\145\160\160ss\160\160\001\0143\001\000\211@\145\160\160tt\160\160\001\0144\001\000\212@\145\160\160uu\160\160\001\0145\001\000\213@\145\160\160vv\160\160\001\0146\001\000\214@\145\160\160ww\160\160\001\0147\001\000\215@\145\160\160xx\160\160\001\0148\001\000\216@\145\160\160yy\160\160\001\0149\001\000\217@\145\160\160zz\160\160\001\014:\001\000\218@\144\160{{\144\160||\144\160}}\144\160~~`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@@@@@@@@@\144\160\001 \172\001\000\128@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let cp875_to_unicode = lazy (Marshal.from_string`\001\003\145\001\003\146\001\003\147\001\003\148\001\003\149\001\003\150\001\003\151\001\003\152\001\003\153\000[n|hkaf\001\003\154\001\003\155\001\003\156\001\003\157\001\003\158\001\003\159\001\003\160\001\003\161\001\003\163\000]dji{\000^mo\001\003\164\001\003\165\001\003\166\001\003\167\001\003\168\001\003\169\001\003\170\001\003\171\000|le\000_~\127\001\000\168\001\003\134\001\003\136\001\003\137\001\000\160\001\003\138\001\003\140\001\003\142\001\003\143\000`zc\000@g}b\001\003\133\000a\000b\000c\000d\000e\000f\000g\000h\000i\001\003\177\001\003\178\001\003\179\001\003\180\001\003\181\001\003\182\001\000\176\000j\000k\000l\000m\000n\000o\000p\000q\000r\001\003\183\001\003\184\001\003\185\001\003\186\001\003\187\001\003\188\001\000\180\000~\000s\000t\000u\000v\000w\000x\000y\000z\001\003\189\001\003\190\001\003\191\001\003\192\001\003\193\001\003\195\001\000\163\001\003\172\001\003\173\001\003\174\001\003\202\001\003\175\001\003\204\001\003\205\001\003\203\001\003\206\001\003\194\001\003\196\001\003\197\001\003\198\001\003\199\001\003\200\000{\000A\000B\000C\000D\000E\000F\000G\000H\000I\001\000\173\001\003\201\001\003\144\001\003\176\001 \024\001 \021\000}\000J\000K\000L\000M\000N\000O\000P\000Q\000R\001\000\177\001\000\189\000\255\001\003\135\001 \025\001\000\166\000\\\000\255\000S\000T\000U\000V\000W\000X\000Y\000Z\001\000\178\001\000\167\000\255\000\255\001\000\171\001\000\172pqrstuvwxy\001\000\179\001\000\169\000\255\000\255\001\000\187\001\000\159" 0 : int array);;
+let cp875_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\220\000\000\000\000\000\000\006\142\000\000\006\142\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160Dw\144\160Em\144\160Fn\144\160Go\144\160HV\144\160IE\144\160Je\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\144\160SS\144\160T|\145\160\160U}\160\160\001 \021\001\000\207@\144\160Vr\144\160Wf\145\160\160XX\160\160\001 \024\001\000\206@\145\160\160YY\160\160\001 \025\001\000\222@@\144\160[g\144\160\\\\\144\160]]\144\160^^\144\160__\144\160`\000@\144\160a\000O\144\160b\000\127\144\160c\000{\144\160d\000[\144\160e\000l\144\160f\000P\144\160g\000}\144\160h\000M\144\160i\000]\144\160j\000\\\144\160k\000N\144\160l\000k\144\160m\000`\144\160n\000K\144\160o\000a\144\160p\001\000\240\144\160q\001\000\241\144\160r\001\000\242\144\160s\001\000\243\144\160t\001\000\244\144\160u\001\000\245\144\160v\001\000\246\144\160w\001\000\247\144\160x\001\000\248\144\160y\001\000\249\144\160z\000z\144\160{\000^\144\160|\000L\144\160}\000~\144\160~\000n\144\160\127\000o\144\160\000@\000|\144\160\000A\001\000\193\144\160\000B\001\000\194\144\160\000C\001\000\195\144\160\000D\001\000\196\144\160\000E\001\000\197\144\160\000F\001\000\198\144\160\000G\001\000\199\144\160\000H\001\000\200\144\160\000I\001\000\201\144\160\000J\001\000\209\144\160\000K\001\000\210\144\160\000L\001\000\211\144\160\000M\001\000\212\144\160\000N\001\000\213\144\160\000O\001\000\214\144\160\000P\001\000\215\144\160\000Q\001\000\216\144\160\000R\001\000\217\144\160\000S\001\000\226\144\160\000T\001\000\227\144\160\000U\001\000\228\144\160\000V\001\000\229\144\160\000W\001\000\230\144\160\000X\001\000\231\144\160\000Y\001\000\232\144\160\000Z\001\000\233\144\160\000[\000J\144\160\000\\\001\000\224\144\160\000]\000Z\144\160\000^\000_\144\160\000_\000m\144\160\000`\000y\144\160\000a\001\000\129\144\160\000b\001\000\130\144\160\000c\001\000\131\144\160\000d\001\000\132\144\160\000e\001\000\133\144\160\000f\001\000\134\144\160\000g\001\000\135\144\160\000h\001\000\136\144\160\000i\001\000\137\144\160\000j\001\000\145\144\160\000k\001\000\146\144\160\000l\001\000\147\144\160\000m\001\000\148\144\160\000n\001\000\149\144\160\000o\001\000\150\144\160\000p\001\000\151\144\160\000q\001\000\152\144\160\000r\001\000\153\144\160\000s\001\000\162\144\160\000t\001\000\163\144\160\000u\001\000\164\144\160\000v\001\000\165\144\160\000w\001\000\166\144\160\000x\001\000\167\144\160\000y\001\000\168\144\160\000z\001\000\169\144\160\000{\001\000\192\144\160\000|\000j\144\160\000}\001\000\208\144\160\000~\001\000\161\144\160\000\127G\144\160\001\000\128`\144\160\001\000\129a\144\160\001\000\130b\144\160\001\000\131c\144\160\001\000\132d\145\160\160\001\000\133U\160\160\001\003\133\001\000\128@\145\160\160\001\000\134F\160\160\001\003\134\000q@\145\160\160\001\000\135W\160\160\001\003\135\001\000\221@\145\160\160\001\000\136h\160\160\001\003\136\000r@\145\160\160\001\000\137i\160\160\001\003\137\000s@\145\160\160\001\000\138j\160\160\001\003\138\000u@\144\160\001\000\139k\145\160\160\001\000\140l\160\160\001\003\140\000v@\144\160\001\000\141I\145\160\160\001\000\142J\160\160\001\003\142\000w@\145\160\160\001\000\143[\160\160\001\003\143\000x@\145\160\160\001\000\144p\160\160\001\003\144\001\000\204@\145\160\160\001\000\145q\160\160\001\003\145\000A@\145\160\160\001\000\146Z\160\160\001\003\146\000B@\145\160\160\001\000\147s\160\160\001\003\147\000C@\145\160\160\001\000\148t\160\160\001\003\148\000D@\145\160\160\001\000\149u\160\160\001\003\149\000E@\145\160\160\001\000\150v\160\160\001\003\150\000F@\145\160\160\001\000\151H\160\160\001\003\151\000G@\145\160\160\001\000\152x\160\160\001\003\152\000H@\145\160\160\001\000\153y\160\160\001\003\153\000I@\145\160\160\001\000\154z\160\160\001\003\154\000Q@\145\160\160\001\000\155{\160\160\001\003\155\000R@\145\160\160\001\000\156D\160\160\001\003\156\000S@\145\160\160\001\000\157T\160\160\001\003\157\000T@\145\160\160\001\000\158~\160\160\001\003\158\000U@\145\160\160\001\003\159\000V\160\160\001\000\159\001\000\255@\145\160\160\001\003\160\000W\160\160\001\000\160\000t@\144\160\001\003\161\000X@\145\160\160\001\003\163\000Y\160\160\001\000\163\001\000\176@\144\160\001\003\164\000b\144\160\001\003\165\000c\145\160\160\001\003\166\000d\160\160\001\000\166\001\000\223@\145\160\160\001\003\167\000e\160\160\001\000\167\001\000\235@\145\160\160\001\003\168\000f\160\160\001\000\168\000p@\145\160\160\001\003\169\000g\160\160\001\000\169\001\000\251@\144\160\001\003\170\000h\145\160\160\001\003\171\000ietmappings.from_uni_list array);;
+ let adobe_standard_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\031\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdef\001 \025hijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\001 \024\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001\000\161\001\000\162\001\000\163\001 D\001\000\165\001\001\146\001\000\167\001\000\164g\001 \028\001\000\171\001 9\001 :\002\000\000\251\001\002\000\000\251\002\000\255\001 \019\001 \001 !\001\000\183\000\255\001\000\182\001 \"\001 \026\001 \030\001 \029\001\000\187\001 &\001 0\000\255\001\000\191\000\255\000`int array);;
+let adobe_standard_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\004K\000\000\000\000\000\000\004&\000\000\004&\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\174\144\160\002\000\000\251\002\001\000\175@@@@@@@@@@@@@@@@\144\160\001 \019\001\000\177\144\160\001 \020\001\000\208@@@\144\160\001 \024\000`\144\160\001 \025g\144\160\001 \026\001\000\184@\144\160\001 \028\001\000\170\144\160\001 \029\001\000\186\144\160\001 \030\001\000\185@\145\160\160``\160\160\001 \001\000\178@\145\160\160aa\160\160\001 !\001\000\179@\145\160\160bb\160\160\001 \"\001\000\183@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160g\001\000\169\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\189@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\172@\145\160\160zz\160\160\001 :\001\000\173@\144\160{{\144\160||\144\160}}\144\160~~`\001\000\193\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~@@@@@@@@@@@@@@@@@@@\144\160\001\001\146\001\000\166@@@@@@@@@@@@@@\144\160\001\000\161\001\000\161\144\160\001\000\162\001\000\162\144\160\001\000\163\001\000\163\144\160\001\000\164\001\000\168\144\160\001\000\165\001\000\165@\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\200@\144\160\001\000\170\001\000\227\144\160\001\000\171\001\000\171@@@\144\160\001\000\175\001\000\197@@@@\144\160\001\000\180\001\000\194@\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\180\144\160\001\000\184\001\000\203@\144\160\001\000\186\001\000\235\144\160\001\000\187\001\000\187@@@\144\160\001\000\191\001\000\191@@@@@@\145\160\160\001\002\198\001\000\195\160\160\001\000\198\001\000\225@\144\160\001\002\199\001\000\207@@@@@@@@@@@@@@@@\145\160\160\001\002\216\001\000\198\160\160\001\000\216\001\000\233@\144\160\001\002\217\001\000\199\144\160\001\002\218\001\000\202\144\160\001\002\219\001\000\206\144\160\001\002\220\001\000\196\144\160\001\002\221\001\000\205@\144\160\001\000\223\001\000\251@@@@@@\144\160\001\000\230\001\000\241@@@@@@@@@@@@@@@@@\144\160\001\000\248\001\000\249@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let adobe_symbol_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\192\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`a\001\"\000c\001\"\003ef\001\"\011hi\001\"\023kl\001\"\018nopqrstuvwxyz{|}~\127\001\"E\001\003\145\001\003\146\001\003\167\001\003\148\001\003\149\001\003\166\001\003\147\001\003\151\001\003\153\001\003\209\001\003\154\001\003\155\001\003\156\001\003\157\001\003\159\001\003\160\001\003\152\001\003\161\001\003\163\001\003\164\001\003\165\001\003\194\001\003\169\001\003\158\001\003\168\001\003\150\000[\001\"4\000]\001\"\165\000_\002\000\000\248\229\001\003\177\001\003\178\001\003\199\001\003\180\001\003\181\001\003\198\001\003\179\001\003\183\001\003\185\001\003\213\001\003\186\001\003\187\001\003\188\001\003\189\001\003\191\001\003\192\001\003\184\001\003\193\001\003\195\001\003\196\001\003\197\001\003\214\001\003\201\001\003\190\001\003\200\001\003\182\000{\000|\000}\001\"<\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\001 \172\001\003\210\001 2\001\"d\001 D\001\"\030\001\001\146\001&c\001&f\001&e\001&`\001!\148\001!\144\001!\145\001!\146\001!\147\001\000\176\001\000\177\001 3\001\"e\001\000\215\001\"\029\001\"\002\001 \"\001\000\247\001\"`\001\"a\001\"H\001 &\002\000\000\248\230\002\000\000\248\231\001!\181\001!5\001!\017\001!\028\001!\024\001\"\151\001\"\149\001\"\005\001\")\001\"*\001\"\131\001\"\135\001\"\132\001\"\130\001\"\134\001\"\008\001\"\tint array);;
+let adobe_symbol_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\t\000\000\000\000\000\000\005)\000\000\005)\008\000\004\000\000\144\160\001\"\000b@\144\160\001\"\002\001\000\182\144\160\001\"\003d@\144\160\001\"\005\001\000\198@\144\160\001\"\007\001\000\209\144\160\001\"\008\001\000\206\144\160\001\"\t\001\000\207@\144\160\001\"\011g@@@\144\160\001\"\015\001\000\213@\145\160\160\001!\017\001\000\193\160\160\001\"\017\001\000\229@\144\160\001\"\018m@@@@\144\160\001\"\023j\144\160\001!\024\001\000\195@\144\160\001\"\026\001\000\214@\144\160\001!\028\001\000\194\144\160\001\"\029\001\000\181\144\160\001\"\030\001\000\165@\145\160\160``\160\160\001\" \001\000\208\160\160\001# \001\000\243@\145\160\160aa\160\160\001#!\001\000\245@\144\160\001 \"\001\000\183\144\160cc@\144\160ee\145\160\160ff\160\160\001 &\001\000\188@\144\160\001\"'\001\000\217\145\160\160hh\160\160\001\"(\001\000\218@\145\160\160ii\160\160\001\")\001\000\199\160\160\001#)\001\000\225@\145\160\160\001\"*\001\000\200\160\160\001#*\001\000\241@\145\160\160kk\160\160\001\"+\001\000\242@\144\160ll@\144\160nn\144\160oo\144\160pp\144\160qq\145\160\160rr\160\160\001 2\001\000\162@\145\160\160ss\160\160\001 3\001\000\178@\145\160\160tt\160\160\001\"4\000\\@\145\160\160uu\160\160\001!5\001\000\192@\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\145\160\160||\160\160\001\"<\000~@\144\160}}\144\160~~\144\160\127\127@@@@\144\160\001 D\001\000\164\144\160\001\"E\000@@@\144\160\001\"H\001\000\187@@@@@@@@@@@@@@@@@@\144\160\000[\000[@\144\160\000]\000]@\144\160\000_\000_\145\160\160\001&`\001\000\170\160\160\001\"`\001\000\185@\144\160\001\"a\001\000\186@\144\160\001&c\001\000\167\144\160\001\"d\001\000\163\145\160\160\001&e\001\000\169\160\160\001\"e\001\000\179@\144\160\001&f\001\000\168@@@@@@@@@@@@@@@@@@@@\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}@@@@\144\160\001\"\130\001\000\204\144\160\001\"\131\001\000\201\144\160\001\"\132\001\000\203@\144\160\001\"\134\001\000\205\144\160\001\"\135\001\000\202@@@@@@@@\144\160\001!\144\001\000\172\145\160\160\001\003\145\000A\160\160\001!\145\001\000\173@\145\160\160\001\003\146\000B\160\160\001\001\146\001\000\166\160\160\001!\146\001\000\174@\145\160\160\001\003\147\000G\160\160\001!\147\001\000\175@\145\160\160\001\003\148\000D\160\160\001!\148\001\000\171@\145\160\160\001\003\149\000E\160\160\001\"\149\001\000\197@\144\160\001\003\150\000Z\145\160\160\001\003\151\000H\160\160\001\"\151\001\000\196@\144\160\001\003\152\000Q\144\160\001\003\153\000I\144\160\001\003\154\000K\144\160\001\003\155\000L\144\160\001\003\156\000M\144\160\001\003\157\000N\144\160\001\003\158\000X\144\160\001\003\159\000O\144\160\001\003\160\000P\144\160\001\003\161\000R@\144\160\001\003\163\000S\144\160\001\003\164\000T\145\160\160\001\003\165\000U\160\160\001\"\165\000^@\144\160\001\003\166\000F\144\160\001\003\167\000C\144\160\001\003\168\000Y\144\160\001\003\169\000W@@\145\160\160\001 \172\001\000\160\160\160\001\000\172\001\000\216@@@@\144\160\001\000\176\001\000\176\145\160\160\001\003\177\000a\160\160\001\000\177\001\000\177@\144\160\001\003\178\000b\144\160\001\003\179\000g\144\160\001\003\180\000d\145\160\160\001\003\181\000e\160\160\001!\181\001\000\191@\144\160\001\003\182\000z\144\160\001\003\183\000h\144\160\001\003\184\000q\144\160\001\003\185\000i\144\160\001\003\186\000k\144\160\001\003\187\000l\144\160\001\003\188\000m\144\160\001\003\189\000n\144\160\001\003\190\000x\144\160\001\003\191\000o\144\160\001\003\192\000p\144\160\001\003\193\000r\144\160\001\003\194\000V\144\160\001\003\195\000s\144\160\001\003\196\000t\145\160\160\001\003\197\000u\160\160\001\"\197\001\000\215@\144\160\001\003\198\000f\144\160\001\003\199\000c\144\160\001\003\200\000y\144\160\001\003\201\000w\144\160\001%\202\001\000\224@@@@@\144\160\001!\208\001\000\220\145\160\160\001\003\209\000J\160\160\001!\209\001\000\221@\145\160\160\001\003\210\001\000\161\160\160\001!\210\001\000\222@\144\160\001!\211\001\000\223\144\160\001!\212\001\000\219\144\160\001\003\213\000j\144\160\001\003\214\000v\144\160\001\000\215\001\000\180@\144\160\002\000\000\246\217\001\000\211\144\160\002\000\000\246\218\001\000\210\144\160\002\000\000\246\219\001\000\212@@@@@@@@@\144\160\002\000\000\248\229\000`etmappings.from_uni_list array);;
+ let adobe_zapf_dingbats_encoding_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\233\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`\001'\001\001'\002\001'\003\001'\004\001&\014\001'\006\001'\007\001'\008\001'\ta\001'b\001'c\001'd\001'e\001'f\001'g\001&c\001&f\001&e\001&`\001$`\001$a\001$b\001$c\001$d\001$e\001$f\001$g\001$h\001$i\001'v\001'w\001'x\001'y\001'z\001'{\001'|\001'}\001'~\001'\127\001'\128\001'\129\001'\130\001'\131\001'\132\001'\133\001'\134\001'\135\001'\136\001'\137\001'\138\001'\139\001'\140\001'\141\001'\142\001'\143\001'\144\001'\145\001'\146\001'\147\001'\148\001!\146\001!\148\001!\149\001'\152\001'\153\001'\154\001'\155\001'\156\001'\157\001'\158\001'\159\001'\160\001'\161\001'\162\001'\163\001'\164\001'\165\001'\166\001'\167\001'\168\001'\169\001'\170\001'\171\001'\172\001'\173\001'\174\001'\175\000\255\001'\177\001'\178\001'\179\001'\180\001'\181\001'\182\001'\183\001'\184\001'\185\001'\186\001'\187\001'\188\001'\189\001'\190\000\255" 0 : int array);;
+let adobe_zapf_dingbats_encoding_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006_\000\000\000\000\000\000\005>\000\000\005>\008\000\004\000\000@\144\160\001'\001a\144\160\001'\002b\144\160\001'\003c\144\160\001'\004d\144\160\001&\005\000H\144\160\001'\006f\144\160\001'\007g\144\160\001'\008h\144\160\001'\ti@@\144\160\001'\012l\144\160\001'\013m\145\160\160\001&\014e\160\160\001'\014n@\144\160\001'\015o\144\160\001'\016p\144\160\001'\017q\144\160\001'\018r\144\160\001'\019s\144\160\001'\020t\144\160\001'\021u\144\160\001'\022v\144\160\001'\023w\144\160\001'\024x\144\160\001'\025y\144\160\001'\026z\145\160\160\001&\027j\160\160\001'\027{@\144\160\001'\028|\144\160\001'\029}\145\160\160\001&\030k\160\160\001'\030~@\144\160\001'\031\127\145\160\160```\144\160\001'A\000a\144\160\001'B\000b\144\160\001'C\000c\144\160\001'D\000d\144\160\001'E\000e\144\160\001'F\000f\144\160\001'G\000g\144\160\001'H\000h\144\160\001'I\000i\144\160\001'J\000j\144\160\001'K\000k@\144\160\001'M\000m@\144\160\001'O\000o\144\160\001'P\000p\144\160\001'Q\000q\144\160\001'R\000r@@@\144\160\001'V\000v@\144\160\001'X\000x\144\160\001'Y\000y\144\160\001'Z\000z\144\160\001'[\000{\144\160\001'\\\000|\144\160\001']\000}\144\160\001'^\000~@\145\160\160\001&`\001\000\171\160\160\001$`\001\000\172@\145\160\160\001'a\001\000\161\160\160\001$a\001\000\173@\145\160\160\001'b\001\000\162\160\160\001$b\001\000\174@\145\160\160\001'c\001\000\163\160\160\001&c\001\000\168\160\160\001$c\001\000\175@\145\160\160\001'd\001\000\164\160\160\001$d\001\000\176@\145\160\160\001'e\001\000\165\160\160\001&e\001\000\170\160\160\001$e\001\000\177@\145\160\160\001'f\001\000\166\160\160\001&f\001\000\169\160\160\001$f\001\000\178@\145\160\160\001'g\001\000\167\160\160\001$g\001\000\179@\144\160\001$h\001\000\180\144\160\001$i\001\000\181@@@@@@@@@@@@\144\160\001'v\001\000\182\144\160\001'w\001\000\183\144\160\001'x\001\000\184\144\160\001'y\001\000\185\144\160\001'z\001\000\186\144\160\001'{\001\000\187\144\160\001'|\001\000\188\144\160\001'}\001\000\189\144\160\001'~n\160\160\001'\160\001\000\224@\144\160\001'\161\001\000\225\144\160\001'\162\001\000\226\144\160\001'\163\001\000\227\144\160\001'\164\001\000\228\144\160\001'\165\001\000\229\144\160\001'\166\001\000\230\144\160\001'\167\001\000\231\144\160\001'\168\001\000\232\144\160\001'\169\001\000\233\144\160\001'\170\001\000\234\144\160\001'\171\001\000\235\144\160\001'\172\001\000\236\144\160\001'\173\001\000\237\144\160\001'\174\001\000\238\144\160\001'\175\001\000\239@\144\160\001'\177\001\000\241\145\160\160\001%\178\000s\160\160\001'\178\001\000\242@\144\160\001'\179\001\000\243\144\160\001'\180\001\000\244\144\160\001'\181\001\000\245\144\160\001'\182\001\000\246\144\160\001'\183\001\000\247\144\160\001'\184\001\000\248\144\160\001'\185\001\000\249\144\160\001'\186\001\000\250\144\160\001'\187\001\000\251\145\160\160\001%\188\000t\160\160\001'\188\001\000\252@\144\160\001'\189\001\000\253\144\160\001'\190\001\000\254@@@@@@@\144\160\001%\198\000u@@@@@@@@\144\160\001%\207\000l@@@@@@@\145\160\160\001%\215\000w\160\160\002\000\000\248\215\001\000\128@\144\160\002\000\000\248\216\001\000\129\144\160\002\000\000\248\217\001\000\130\144\160\002\000\000\248\218\001\000\131\144\160\002\000\000\248\219\001\000\132\144\160\002\000\000\248\220\001\000\133\144\160\002\000\000\248\221\001\000\134\144\160\002\000\000\248\222\001\000\135\144\160\002\000\000\248\223\001\000\136\144\160\002\000\000\248\224\001\000\137\144\160\002\000\000\248\225\001\000\138\144\160\002\000\000\248\226\001\000\139\144\160\002\000\000\248\227\001\000\140\144\160\002\000\000\248\228\001\000\141@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let jis0201_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002\164\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\001\000\165\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\001 >\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\002\000\000\255a\002\000\000\255b\002\000\000\255c\002\000\000\255d\002\000\000\255e\002\000\000\255f\002\000\000\255g\002\000\000\255h\002\000\000\255i\002\000\000\255j\002\000\000\255k\002\000\000\255l\002\000\000\255m\002\000\000\255n\002\000\000\255o\002\000\000\255p\002\000\000\255q\002\000\000\255r\002\000\000\255s\002\000\000\255t\002\000\000\255u\002\000\000\255v\002\000\000\255w\002\000\000\255x\002\000\000\255y\002\000\000\255z\002\000\000\255{\002\000\000\255|\002\000\000\255}\002\000\000\255~int array);;
+let jis0201_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0053\000\000\000\000\000\000\004\143\000\000\004\143\008\000\004\000\000@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160``\144\160aa\144\160bb\144\160cc\144\160dd\144\160ee\144\160ff\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\144\160pp\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\144\160yy\144\160zz\144\160{{\144\160||\144\160}}\145\160\160~~\160\160\001 >\000~@\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[@\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\145\160\160\000a\000a\160\160\002\000\000\255a\001\000\161@\145\160\160\000b\000b\160\160\002\000\000\255b\001\000\162@\145\160\160\000c\000c\160\160\002\000\000\255c\001\000\163@\145\160\160\000d\000d\160\160\002\000\000\255d\001\000\164@\145\160\160\000e\000e\160\160\002\000\000\255e\001\000\165@\145\160\160\000f\000f\160\160\002\000\000\255f\001\000\166@\145\160\160\000g\000g\160\160\002\000\000\255g\001\000\167@\145\160\160\000h\000h\160\160\002\000\000\255h\001\000\168@\145\160\160\000i\000i\160\160\002\000\000\255i\001\000\169@\145\160\160\000j\000j\160\160\002\000\000\255j\001\000\170@\145\160\160\000k\000k\160\160\002\000\000\255k\001\000\171@\145\160\160\000l\000l\160\160\002\000\000\255l\001\000\172@\145\160\160\000m\000m\160\160\002\000\000\255m\001\000\173@\145\160\160\000n\000n\160\160\002\000\000\255n\001\000\174@\145\160\160\000o\000o\160\160\002\000\000\255o\001\000\175@\145\160\160\000p\000p\160\160\002\000\000\255p\001\000\176@\145\160\160\000q\000q\160\160\002\000\000\255q\001\000\177@\145\160\160\000r\000r\160\160\002\000\000\255r\001\000\178@\145\160\160\000s\000s\160\160\002\000\000\255s\001\000\179@\145\160\160\000t\000t\160\160\002\000\000\255t\001\000\180@\145\160\160\000u\000u\160\160\002\000\000\255u\001\000\181@\145\160\160\000v\000v\160\160\002\000\000\255v\001\000\182@\145\160\160\000w\000w\160\160\002\000\000\255w\001\000\183@\145\160\160\000x\000x\160\160\002\000\000\255x\001\000\184@\145\160\160\000y\000y\160\160\002\000\000\255y\001\000\185@\145\160\160\000z\000z\160\160\002\000\000\255z\001\000\186@\145\160\160\000{\000{\160\160\002\000\000\255{\001\000\187@\145\160\160\000|\000|\160\160\002\000\000\255|\001\000\188@\145\160\160\000}\000}\160\160\002\000\000\255}\001\000\189@\144\160\002\000\000\255~\001\000\190\144\160\002\000\000\255\127\001\000\191\144\160\002\000\000\255\128\001\000\192\144\160\002\000\000\255\129\001\000\193\144\160\002\000\000\255\130\001\000\194\144\160\002\000\000\255\131\001\000\195\144\160\002\000\000\255\132\001\000\196\144\160\002\000\000\255\133\001\000\197\144\160\002\000\000\255\134\001\000\198\144\160\002\000\000\255\135\001\000\199\144\160\002\000\000\255\136\001\000\200\144\160\002\000\000\255\137\001\000\201\144\160\002\000\000\255\138\001\000\202\144\160\002\000\000\255\139\001\000\203\144\160\002\000\000\255\140\001\000\204\144\160\002\000\000\255\141\001\000\205\144\160\002\000\000\255\142\001\000\206\144\160\002\000\000\255\143\001\000\207\144\160\002\000\000\255\144\001\000\208\144\160\002\000\000\255\145\001\000\209\144\160\002\000\000\255\146\001\000\210\144\160\002\000\000\255\147\001\000\211\144\160\002\000\000\255\148\001\000\212\144\160\002\000\000\255\149\001\000\213\144\160\002\000\000\255\150\001\000\214\144\160\002\000\000\255\151\001\000\215\144\160\002\000\000\255\152\001\000\216\144\160\002\000\000\255\153\001\000\217\144\160\002\000\000\255\154\001\000\218\144\160\002\000\000\255\155\001\000\219\144\160\002\000\000\255\156\001\000\220\144\160\002\000\000\255\157\001\000\221\144\160\002\000\000\255\158\001\000\222\144\160\002\000\000\255\159\001\000\223@@@@@\144\160\001\000\165\000\\@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let koi8r_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001%\000\001%\002\001%\012\001%\016\001%\020\001%\024\001%\028\001%$\001%,\001%4\001%<\001%\128\001%\132\001%\136\001%\140\001%\144\001%\145\001%\146\001%\147\001# \001%\160\001\"\025\001\"\026\001\"H\001\"d\001\"e\001\000\160\001#!\001\000\176\001\000\178\001\000\183\001\000\247\001%P\001%Q\001%R\001\004Q\001%S\001%T\001%U\001%V\001%W\001%X\001%Y\001%Z\001%[\001%\\\001%]\001%^\001%_\001%`\001%a\001\004\001\001%b\001%c\001%d\001%e\001%f\001%g\001%h\001%i\001%j\001%k\001%l\001\000\169\001\004N\001\0040\001\0041\001\004F\001\0044\001\0045\001\004D\001\0043\001\004E\001\0048\001\0049\001\004:\001\004;\001\004<\001\004=\001\004>\001\004?\001\004O\001\004@\001\004A\001\004B\001\004C\001\0046\001\0042\001\004L\001\004K\001\0047\001\004H\001\004M\001\004I\001\004G\001\004J\001\004.\001\004\016\001\004\017\001\004&\001\004\020\001\004\021\001\004$\001\004\019\001\004%\001\004\024\001\004\025\001\004\026\001\004\027\001\004\028\001\004\029\001\004\030\001\004\031\001\004/\001\004 \001\004!\001\004\"\001\004#\001\004\022\001\004\018\001\004,\001\004+\001\004\023\001\004(\001\004-\001\004)\001\004'\001\004*" 0 : int array);;
+let koi8r_from_unicode = lazy (Marshal.from_string``\160\160\001# \001\000\147\160\160\001\004 \001\000\242@\145\160\160aa\160\160\001#!\001\000\155\160\160\001\004!\001\000\243@\145\160\160bb\160\160\001\004\"\001\000\244@\145\160\160cc\160\160\001\004#\001\000\245@\145\160\160dd\160\160\001%$\001\000\135\160\160\001\004$\001\000\230@\145\160\160ee\160\160\001\004%\001\000\232@\145\160\160ff\160\160\001\004&\001\000\227@\145\160\160gg\160\160\001\004'\001\000\254@\145\160\160hh\160\160\001\004(\001\000\251@\145\160\160ii\160\160\001\004)\001\000\253@\145\160\160jj\160\160\001\004*\001\000\255@\145\160\160kk\160\160\001\004+\001\000\249@\145\160\160ll\160\160\001%,\001\000\136\160\160\001\004,\001\000\248@\145\160\160mm\160\160\001\004-\001\000\252@\145\160\160nn\160\160\001\004.\001\000\224@\145\160\160oo\160\160\001\004/\001\000\241@\145\160\160pp\160\160\001\0040\001\000\193@\145\160\160qq\160\160\001\0041\001\000\194@\145\160\160rr\160\160\001\0042\001\000\215@\145\160\160ss\160\160\001\0043\001\000\199@\145\160\160tt\160\160\001%4\001\000\137\160\160\001\0044\001\000\196@\145\160\160uu\160\160\001\0045\001\000\197@\145\160\160vv\160\160\001\0046\001\000\214@\145\160\160ww\160\160\001\0047\001\000\218@\145\160\160xx\160\160\001\0048\001\000\201@\145\160\160yy\160\160\001\0049\001\000\202@\145\160\160zz\160\160\001\004:\001\000\203@\145\160\160{{\160\160\001\004;\001\000\204@\145\160\160||\160\160\001%<\001\000\138\160\160\001\004<\001\000\205@\145\160\160}}\160\160\001\004=\001\000\206@\145\160\160~~\160\160\001\004>\001\000\207@\145\160\160\127\127\160\160\001\004?\001\000\208@\145\160\160\000@\000@\160\160\001\004@\001\000\210@\145\160\160\000A\000A\160\160\001\004A\001\000\211@\145\160\160\000B\000B\160\160\001\004B\001\000\212@\145\160\160\000C\000C\160\160\001\004C\001\000\213@\145\160\160\000D\000D\160\160\001\004D\001\000\198@\145\160\160\000E\000E\160\160\001\004E\001\000\200@\145\160\160\000F\000F\160\160\001\004F\001\000\195@\145\160\160\000G\000G\160\160\001\004G\001\000\222@\145\160\160\000H\000H\160\160\001\"H\001\000\151\160\160\001\004H\001\000\219@\145\160\160\000I\000I\160\160\001\004I\001\000\221@\145\160\160\000J\000J\160\160\001\004J\001\000\223@\145\160\160\000K\000K\160\160\001\004K\001\000\217@\145\160\160\000L\000L\160\160\001\004L\001\000\216@\145\160\160\000M\000M\160\160\001\004M\001\000\220@\145\160\160\000N\000N\160\160\001\004N\001\000\192@\145\160\160\000O\000O\160\160\001\004O\001\000\209@\145\160\160\000P\000P\160\160\001%P\001\000\160@\145\160\160\000Q\000Q\160\160\001%Q\001\000\161\160\160\001\004Q\001\000\163@\145\160\160\000R\000R\160\160\001%R\001\000\162@\145\160\160\000S\000S\160\160\001%S\001\000\164@\145\160\160\000T\000T\160\160\001%T\001\000\165@\145\160\160\000U\000U\160\160\001%U\001\000\166@\145\160\160\000V\000V\160\160\001%V\001\000\167@\145\160\160\000W\000W\160\160\001%W\001\000\168@\145\160\160\000X\000X\160\160\001%X\001\000\169@\145\160\160\000Y\000Y\160\160\001%Y\001\000\170@\145\160\160\000Z\000Z\160\160\001%Z\001\000\171@\145\160\160\000[\000[\160\160\001%[\001\000\172@\145\160\160\000\\\000\\\160\160\001%\\\001\000\173@\145\160\160\000]\000]\160\160\001%]\001\000\174@\145\160\160\000^\000^\160\160\001%^\001\000\175@\145\160\160\000_\000_\160\160\001%_\001\000\176@\145\160\160\000`\000`\160\160\001%`\001\000\177@\145\160\160\000a\000a\160\160\001%a\001\000\178@\145\160\160\000b\000b\160\160\001%b\001\000\180@\145\160\160\000c\000c\160\160\001%c\001\000\181@\145\160\160\000d\000d\160\160\001\"d\001\000\152\160\160\001%d\001\000\182@\145\160\160\000e\000e\160\160\001\"e\001\000\153\160\160\001%e\001\000\183@\145\160\160\000f\000f\160\160\001%f\001\000\184@\145\160\160\000g\000g\160\160\001%g\001\000\185@\145\160\160\000h\000h\160\160\001%h\001\000\186@\145\160\160\000i\000i\160\160\001%i\001\000\187@\145\160\160\000j\000j\160\160\001%j\001\000\188@\145\160\160\000k\000k\160\160\001%k\001\000\189@\145\160\160\000l\000l\160\160\001%l\001\000\190@\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127\144\160\001%\128\001\000\139@@@\144\160\001%\132\001\000\140@@@\144\160\001%\136\001\000\141@@@\144\160\001%\140\001\000\142@@@\144\160\001%\144\001\000\143\144\160\001%\145\001\000\144\144\160\001%\146\001\000\145\144\160\001%\147\001\000\146@@@@@@@@@@@@\145\160\160\001%\160\001\000\148\160\160\001\000\160\001\000\154@@@@@@@@@\144\160\001\000\169\001\000\191@@@@@@\144\160\001\000\176\001\000\156@\144\160\001\000\178\001\000\157@@@@\144\160\001\000\183\001\000\158@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\247\001\000\159@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let macroman_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002k\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255\000\255`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~`\001\000\198\001\000\216\001\"\030\001\000\177\001\"d\001\"e\001\000\165\001\000\181\001\"\002\001\"\017\001\"\015\001\003\192\001\"+\001\000\170\001\000\186\001\003\169\001\000\230\001\000\248\001\000\191\001\000\161\001\000\172\001\"\026\001\001\146\001\"H\001\"\006\001\000\171\001\000\187\001 &\001\000\160\001\000\192\001\000\195\001\000\213\001\001R\001\001S\001 \019\001 \020\001 \028\001 \029\001 \024\001 \025\001\000\247\001%\202\001\000\255\001\001x\001 D\001 \172\001 9\001 :\002\000\000\251\001\002\000\000\251\002\001 !\001\000\183\001 \026\001 \030\001 0\001\000\194\001\000\202\001\000\193\001\000\203\001\000\200\001\000\205\001\000\206\001\000\207\001\000\204\001\000\211\001\000\212\002\000\000\248\255\001\000\210\001\000\218\001\000\219\001\000\217\001\0011\001\002\198\001\002\220\001\000\175\001\002\216\001\002\217\001\002\218\001\000\184\001\002\221\001\002\219\001\002\199" 0 : int array);;
+let macroman_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\135\000\000\000\000\000\000\005\221\000\000\005\221\008\000\004\000\000@\144\160\002\000\000\251\001\001\000\222\145\160\160\001\"\002\001\000\182\160\160\002\000\000\251\002\001\000\223@@@@\144\160\001\"\006\001\000\198@@@@@@@@\144\160\001\"\015\001\000\184@\144\160\001\"\017\001\000\183@\144\160\001 \019\001\000\208\144\160\001 \020\001\000\209@@@\144\160\001 \024\001\000\212\144\160\001 \025\001\000\213\145\160\160\001\"\026\001\000\195\160\160\001 \026\001\000\226@@\144\160\001 \028\001\000\210\144\160\001 \029\001\000\211\145\160\160\001\"\030\001\000\176\160\160\001 \030\001\000\227@@\145\160\160``\160\160\001 \001\000\160@\145\160\160aa\160\160\001 !\001\000\224@\145\160\160bb\160\160\001 \"\001\000\165\160\160\001!\"\001\000\170@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\201@\144\160gg\144\160hh\144\160ii\144\160jj\145\160\160kk\160\160\001\"+\001\000\186@\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\228@\145\160\160qq\160\160\001\0011\001\000\245@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\220@\145\160\160zz\160\160\001 :\001\000\221@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\145\160\160\000D\000D\160\160\001 D\001\000\218@\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\145\160\160\000H\000H\160\160\001\"H\001\000\197@\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\206@\145\160\160\000S\000S\160\160\001\001S\001\000\207@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\"`\001\000\173@\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\145\160\160\000d\000d\160\160\001\"d\001\000\178@\145\160\160\000e\000e\160\160\001\"e\001\000\179@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\217@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let windows1250_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001 \001 !\000\255\001 0\001\001`\001 9\001\001Z\001\001d\001\001}\001\001y\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\001\001a\001 :\001\001[\001\001e\001\001~\001\001z\001\000\160\001\002\199\001\002\216\001\001A\001\000\164\001\001\004\001\000\166\001\000\167\001\000\168\001\000\169\001\001^\001\000\171\001\000\172\001\000\173\001\000\174\001\001{\001\000\176\001\000\177\001\002\219\001\001B\001\000\180\001\000\181\001\000\182\001\000\183\001\000\184\001\001\005\001\001_\001\000\187\001\001=\001\002\221\001\001>\001\001|\001\001T\001\000\193\001\000\194\001\001\002\001\000\196\001\0019\001\001\006\001\000\199\001\001\012\001\000\201\001\001\024\001\000\203\001\001\026\001\000\205\001\000\206\001\001\014\001\001\016\001\001C\001\001G\001\000\211\001\000\212\001\001P\001\000\214\001\000\215\001\001X\001\001n\001\000\218\001\001p\001\000\220\001\000\221\001\001b\001\000\223\001\001U\001\000\225\001\000\226\001\001\003\001\000\228\001\001:\001\001\007\001\000\231\001\001\013\001\000\233\001\001\025\001\000\235\001\001\027\001\000\237\001\000\238\001\001\015\001\001\017\001\001D\001\001H\001\000\243\001\000\244\001\001Q\001\000\246\001\000\247\001\001Y\001\001o\001\000\250\001\001q\001\000\252\001\000\253\001\001c\001\002\217" 0 : int array);;
+let windows1250_from_unicode = lazy (Marshal.from_string``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0019\001\000\197@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\001:\001\000\229@\144\160{{\144\160||\145\160\160}}\160\160\001\001=\001\000\188@\145\160\160~~`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\145\160\160\000b\000b\160\160\001\001b\001\000\222@\145\160\160\000c\000c\160\160\001\001c\001\000\254@\145\160\160\000d\000d\160\160\001\001d\001\000\141@\145\160\160\000e\000e\160\160\001\001e\001\000\157@\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\145\160\160\000n\000n\160\160\001\001n\001\000\217@\145\160\160\000o\000o\160\160\001\001o\001\000\249@\145\160\160\000p\000p\160\160\001\001p\001\000\219@\145\160\160\000q\000q\160\160\001\001q\001\000\251@\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\143@\145\160\160\000z\000z\160\160\001\001z\001\000\159@\145\160\160\000{\000{\160\160\001\001{\001\000\175@\145\160\160\000|\000|\160\160\001\001|\001\000\191@\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~\001\000\158@\144\160\000\127\000\127@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167\144\160\001\000\168\001\000\168\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\128\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@\144\160\001\000\180\001\000\180\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183\144\160\001\000\184\001\000\184@@\144\160\001\000\187\001\000\187@@@@@\144\160\001\000\193\001\000\193\144\160\001\000\194\001\000\194@\144\160\001\000\196\001\000\196@@\145\160\160\001\002\199\001\000\161\160\160\001\000\199\001\000\199@@\144\160\001\000\201\001\000\201@\144\160\001\000\203\001\000\203@\144\160\001\000\205\001\000\205\144\160\001\000\206\001\000\206@@@@\144\160\001\000\211\001\000\211\144\160\001\000\212\001\000\212@\144\160\001\000\214\001\000\214\144\160\001\000\215\001\000\215\144\160\001\002\216\001\000\162\144\160\001\002\217\001\000\255\144\160\001\000\218\001\000\218\144\160\001\002\219\001\000\178\144\160\001\000\220\001\000\220\145\160\160\001\002\221\001\000\189\160\160\001\000\221\001\000\221@@\144\160\001\000\223\001\000\223@\144\160\001\000\225\001\000\225\144\160\001\000\226\001\000\226@\144\160\001\000\228\001\000\228@@\144\160\001\000\231\001\000\231@\144\160\001\000\233\001\000\233@\144\160\001\000\235\001\000\235@\144\160\001\000\237\001\000\237\144\160\001\000\238\001\000\238@@@@\144\160\001\000\243\001\000\243\144\160\001\000\244\001\000\244@\144\160\001\000\246\001\000\246\144\160\001\000\247\001\000\247@@\144\160\001\000\250\001\000\250@\144\160\001\000\252\001\000\252\144\160\001\000\253\001\000\253@@" 0 : Netmappings.from_uni_list array);;
+ let windows1251_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002D\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001\004\002\001\004\003\001 \026\001\004S\001 \030\001 &\001 \001 !\001 \172\001 0\001\004\t\001 9\001\004\nint array);;
+let windows1251_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\007\166\000\000\000\000\000\000\007\129\000\000\007\129\008\000\004\000\000\144\160@@\145\160\160AA\160\160\001\004\001\001\000\168@\145\160\160BB\160\160\001\004\002\001\000\128@\145\160\160CC\160\160\001\004\003\001\000\129@\145\160\160DD\160\160\001\004\004\001\000\170@\145\160\160EE\160\160\001\004\005\001\000\189@\145\160\160FF\160\160\001\004\006\001\000\178@\145\160\160GG\160\160\001\004\007\001\000\175@\145\160\160HH\160\160\001\004\008\001\000\163@\145\160\160II\160\160\001\004\t\001\000\138@\145\160\160JJ\160\160\001\004\n``\160\160\001 \001\000\134\160\160\001\004 \001\000\208@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\004!\001\000\209@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\004\"\001\000\210@\145\160\160cc\160\160\001\004#\001\000\211@\145\160\160dd\160\160\001\004$\001\000\212@\145\160\160ee\160\160\001\004%\001\000\213@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\004&\001\000\214@\145\160\160gg\160\160\001\004'\001\000\215@\145\160\160hh\160\160\001\004(\001\000\216@\145\160\160ii\160\160\001\004)\001\000\217@\145\160\160jj\160\160\001\004*\001\000\218@\145\160\160kk\160\160\001\004+\001\000\219@\145\160\160ll\160\160\001\004,\001\000\220@\145\160\160mm\160\160\001\004-\001\000\221@\145\160\160nn\160\160\001\004.\001\000\222@\145\160\160oo\160\160\001\004/\001\000\223@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0040\001\000\224@\145\160\160qq\160\160\001\0041\001\000\225@\145\160\160rr\160\160\001\0042\001\000\226@\145\160\160ss\160\160\001\0043\001\000\227@\145\160\160tt\160\160\001\0044\001\000\228@\145\160\160uu\160\160\001\0045\001\000\229@\145\160\160vv\160\160\001\0046\001\000\230@\145\160\160ww\160\160\001\0047\001\000\231@\145\160\160xx\160\160\001\0048\001\000\232@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0049\001\000\233@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\004:\001\000\234@\145\160\160{{\160\160\001\004;\001\000\235@\145\160\160||\160\160\001\004<\001\000\236@\145\160\160}}\160\160\001\004=\001\000\237@\145\160\160~~\160\160\001\004>\001\000\238@\145\160\160\127\127\160\160\001\004?\001\000\239@\145\160\160\000@\000@\160\160\001\004@\001\000\240@\145\160\160\000A\000A\160\160\001\004A\001\000\241@\145\160\160\000B\000B\160\160\001\004B\001\000\242@\145\160\160\000C\000C\160\160\001\004C\001\000\243@\145\160\160\000D\000D\160\160\001\004D\001\000\244@\145\160\160\000E\000E\160\160\001\004E\001\000\245@\145\160\160\000F\000F\160\160\001\004F\001\000\246@\145\160\160\000G\000G\160\160\001\004G\001\000\247@\145\160\160\000H\000H\160\160\001\004H\001\000\248@\145\160\160\000I\000I\160\160\001\004I\001\000\249@\145\160\160\000J\000J\160\160\001\004J\001\000\250@\145\160\160\000K\000K\160\160\001\004K\001\000\251@\145\160\160\000L\000L\160\160\001\004L\001\000\252@\145\160\160\000M\000M\160\160\001\004M\001\000\253@\145\160\160\000N\000N\160\160\001\004N\001\000\254@\145\160\160\000O\000O\160\160\001\004O\001\000\255@\144\160\000P\000P\145\160\160\000Q\000Q\160\160\001\004Q\001\000\184@\145\160\160\000R\000R\160\160\001\004R\001\000\144@\145\160\160\000S\000S\160\160\001\004S\001\000\131@\145\160\160\000T\000T\160\160\001\004T\001\000\186@\145\160\160\000U\000U\160\160\001\004U\001\000\190@\145\160\160\000V\000V\160\160\001\004V\001\000\179@\145\160\160\000W\000W\160\160\001\004W\001\000\191@\145\160\160\000X\000X\160\160\001\004X\001\000\188@\145\160\160\000Y\000Y\160\160\001\004Y\001\000\154@\145\160\160\000Z\000Z\160\160\001\004Z\001\000\156@\145\160\160\000[\000[\160\160\001\004[\001\000\158@\145\160\160\000\\\000\\\160\160\001\004\\\001\000\157@\144\160\000]\000]\145\160\160\000^\000^\160\160\001\004^\001\000\162@\145\160\160\000_\000_\160\160\001\004_\001\000\159@\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~\144\160\000\127\000\127@@@@@@@@@@@@@@@@\144\160\001\004\144\001\000\165\144\160\001\004\145\001\000\180@@@@@@@@@@@@@@\144\160\001\000\160\001\000\160@@@\144\160\001\000\164\001\000\164@\144\160\001\000\166\001\000\166\144\160\001\000\167\001\000\167@\144\160\001\000\169\001\000\169@\144\160\001\000\171\001\000\171\145\160\160\001 \172\001\000\136\160\160\001\000\172\001\000\172@\144\160\001\000\173\001\000\173\144\160\001\000\174\001\000\174@\144\160\001\000\176\001\000\176\144\160\001\000\177\001\000\177@@@\144\160\001\000\181\001\000\181\144\160\001\000\182\001\000\182\144\160\001\000\183\001\000\183@@@\144\160\001\000\187\001\000\187@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" 0 : Netmappings.from_uni_list array);;
+ let windows1252_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002@\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\001\001}\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\001\001~\001\001xint array);;
+let windows1252_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\145\160\160\000}\000}\160\160\001\001}\001\000\142@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let windows1253_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0024\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~int array);;
+let windows1253_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\130\000\000\000\000\000\000\006F\000\000\006F\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\145\160\160UU\160\160\001 \021\001\000\175@\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let windows1254_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002>\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\001`\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\001\001a\001 :\001\001S\000\255\000\255\001\001xint array);;
+let windows1254_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\168\000\000\000\000\000\000\006M\000\000\006M\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\144\160NN\144\160OO\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132\160\160\001\001\030\001\000\208@\145\160\160__\160\160\001\001\031\001\000\240@\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0010\001\000\221@\145\160\160qq\160\160\001\0011\001\000\253@\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\145\160\160\000^\000^\160\160\001\001^\001\000\222@\145\160\160\000_\000_\160\160\001\001_\001\000\254@\145\160\160\000`\000`\160\160\001\001`\001\000\138@\145\160\160\000a\000a\160\160\001\001a\001\000\154@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let windows1255_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002.\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~int array);;
+let windows1255_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006N\000\000\000\000\000\000\006\027\000\000\006\027\008\000\004\000\000\144\160@@\144\160AA\144\160BB\144\160CC\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\144\160II\144\160JJ\144\160KK\144\160LL\144\160MM\145\160\160NN\160\160\001 \014\001\000\253@\145\160\160OO\160\160\001 \015\001\000\254@\144\160PP\144\160QQ\144\160RR\145\160\160SS\160\160\001 \019\001\000\150@\145\160\160TT\160\160\001 \020\001\000\151@\144\160UU\144\160VV\144\160WW\145\160\160XX\160\160\001 \024\001\000\145@\145\160\160YY\160\160\001 \025\001\000\146@\145\160\160ZZ\160\160\001 \026\001\000\130@\144\160[[\145\160\160\\\\\160\160\001 \028\001\000\147@\145\160\160]]\160\160\001 \029\001\000\148@\145\160\160^^\160\160\001 \030\001\000\132@\144\160__\145\160\160``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\144\160cc\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\144\160\000R\000R\144\160\000S\000S\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ let windows1256_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002E\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\001\006~\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\001\006yint array);;
+let windows1256_from_unicode = lazy (Marshal.from_string``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135\160\160\001\006!\001\000\193@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\006\"\001\000\194@\145\160\160cc\160\160\001\006#\001\000\195@\145\160\160dd\160\160\001\006$\001\000\196@\145\160\160ee\160\160\001\006%\001\000\197@\145\160\160ff\160\160\001 &\001\000\133\160\160\001\006&\001\000\198@\145\160\160gg\160\160\001\006'\001\000\199@\145\160\160hh\160\160\001\006(\001\000\200@\145\160\160ii\160\160\001\006)\001\000\201@\145\160\160jj\160\160\001\006*\001\000\202@\145\160\160kk\160\160\001\006+\001\000\203@\145\160\160ll\160\160\001\006,\001\000\204@\145\160\160mm\160\160\001\006-\001\000\205@\145\160\160nn\160\160\001\006.\001\000\206@\145\160\160oo\160\160\001\006/\001\000\207@\145\160\160pp\160\160\001 0\001\000\137\160\160\001\0060\001\000\208@\145\160\160qq\160\160\001\0061\001\000\209@\145\160\160rr\160\160\001\0062\001\000\210@\145\160\160ss\160\160\001\0063\001\000\211@\145\160\160tt\160\160\001\0064\001\000\212@\145\160\160uu\160\160\001\0065\001\000\213@\145\160\160vv\160\160\001\0066\001\000\214@\145\160\160ww\160\160\001\0067\001\000\216@\145\160\160xx\160\160\001\0068\001\000\217@\145\160\160yy\160\160\001 9\001\000\139\160\160\001\0069\001\000\218@\145\160\160zz\160\160\001 :\001\000\155\160\160\001\006:\001\000\219@\144\160{{\144\160||\144\160}}\144\160~~`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\006y\001\000\138@\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\145\160\160\000~\000~\160\160\001\006~etmappings.from_uni_list array);;
+ let windows1257_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\0029\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\000\255\001 \030\001 &\001 \001 !\000\255\001 0\000\255\001 9\000\255\001\000\168\001\002\199\001\000\184\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\000\255\001!\"\000\255\001 :\000\255\001\000\175\001\002\219\000\255\001\000\160\000\255\001\000\162\001\000\163\001\000\164\000\255\001\000\166\001\000\167\001\000\216\001\000\169\001\001V\001\000\171\001\000\172\001\000\173\001\000\174\001\000\198\001\000\176\001\000\177\001\000\178\001\000\179\001\000\180\001\000\181\001\000\182\001\000\183\001\000\248\001\000\185\001\001W\001\000\187\001\000\188\001\000\189\001\000\190\001\000\230\001\001\004\001\001.\001\001\000\001\001\006\001\000\196\001\000\197\001\001\024\001\001\018\001\001\012\001\000\201\001\001y\001\001\022\001\001\"\001\0016\001\001*\001\001;\001\001`\001\001C\001\001E\001\000\211\001\001L\001\000\213\001\000\214\001\000\215\001\001r\001\001A\001\001Z\001\001j\001\000\220\001\001{\001\001}\001\000\223\001\001\005\001\001/\001\001\001\001\001\007\001\000\228\001\000\229\001\001\025\001\001\019\001\001\013\001\000\233\001\001z\001\001\023\001\001#\001\0017\001\001+\001\001<\001\001a\001\001D\001\001F\001\000\243\001\001M\001\000\245\001\000\246\001\000\247\001\001s\001\001B\001\001[\001\001k\001\000\252\001\001|\001\001~\001\002\217" 0 : int array);;
+let windows1257_from_unicode = lazy (Marshal.from_string``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153\160\160\001\001\"\001\000\204@\145\160\160cc\160\160\001\001#\001\000\236@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\145\160\160jj\160\160\001\001*\001\000\206@\145\160\160kk\160\160\001\001+\001\000\238@\144\160ll\144\160mm\145\160\160nn\160\160\001\001.\001\000\193@\145\160\160oo\160\160\001\001/\001\000\225@\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\145\160\160vv\160\160\001\0016\001\000\205@\145\160\160ww\160\160\001\0017\001\000\237@\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\145\160\160{{\160\160\001\001;\001\000\207@\145\160\160||\160\160\001\001<\001\000\239@\144\160}}\144\160~~`\000`\160\160\001\001`\001\000\208@\145\160\160\000a\000a\160\160\001\001a\001\000\240@\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\145\160\160\000j\000j\160\160\001\001j\001\000\219@\145\160\160\000k\000k\160\160\001\001k\001\000\251@\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\145\160\160\000r\000r\160\160\001\001r\001\000\216@\145\160\160\000s\000s\160\160\001\001s\001\000\248@\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\144\160\000x\000x\145\160\160\000y\000y\160\160\001\001y\001\000\202@\145\160\160\000z\000z\160\160\001\001z\001\000\234@\145\160\160\000{\000{\160\160\001\001{\001\000\221@\145\160\160\000|\000|\160\160\001\001|\001\000\253@\145\160\160\000}\000}\160\160\001\001}\001\000\222@\145\160\160\000~\000~\160\160\001\001~etmappings.from_uni_list array);;
+ let windows1258_to_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\002<\000\000\000\000\000\000\001\001\000\000\001\001\008\000\004\000\000@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\127\000@\000A\000B\000C\000D\000E\000F\000G\000H\000I\000J\000K\000L\000M\000N\000O\000P\000Q\000R\000S\000T\000U\000V\000W\000X\000Y\000Z\000[\000\\\000]\000^\000_\000`\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000o\000p\000q\000r\000s\000t\000u\000v\000w\000x\000y\000z\000{\000|\000}\000~\000\127\001 \172\000\255\001 \026\001\001\146\001 \030\001 &\001 \001 !\001\002\198\001 0\000\255\001 9\001\001R\000\255\000\255\000\255\000\255\001 \024\001 \025\001 \028\001 \029\001 \"\001 \019\001 \020\001\002\220\001!\"\000\255\001 :\001\001S\000\255\000\255\001\001xtint array);;
+let windows1258_from_unicode = lazy (Marshal.from_string "\132\149\166\190\000\000\006\174\000\000\000\000\000\000\006^\000\000\006^\008\000\004\000\000\145\160\160@@\160\160\001\003\000\001\000\204@\145\160\160AA\160\160\001\003\001\001\000\236@\145\160\160BB\160\160\001\001\002\001\000\195@\145\160\160CC\160\160\001\003\003\001\000\222\160\160\001\001\003\001\000\227@\144\160DD\144\160EE\144\160FF\144\160GG\144\160HH\145\160\160II\160\160\001\003\t``\160\160\001 \001\000\134@\145\160\160aa\160\160\001 !\001\000\135@\145\160\160bb\160\160\001 \"\001\000\149\160\160\001!\"\001\000\153@\145\160\160cc\160\160\001\003#\001\000\242@\144\160dd\144\160ee\145\160\160ff\160\160\001 &\001\000\133@\144\160gg\144\160hh\144\160ii\144\160jj\144\160kk\144\160ll\144\160mm\144\160nn\144\160oo\145\160\160pp\160\160\001 0\001\000\137@\144\160qq\144\160rr\144\160ss\144\160tt\144\160uu\144\160vv\144\160ww\144\160xx\145\160\160yy\160\160\001 9\001\000\139@\145\160\160zz\160\160\001 :\001\000\155@\144\160{{\144\160||\144\160}}\144\160~~\144\160\127\127\144\160\000@\000@\144\160\000A\000A\144\160\000B\000B\144\160\000C\000C\144\160\000D\000D\144\160\000E\000E\144\160\000F\000F\144\160\000G\000G\144\160\000H\000H\144\160\000I\000I\144\160\000J\000J\144\160\000K\000K\144\160\000L\000L\144\160\000M\000M\144\160\000N\000N\144\160\000O\000O\144\160\000P\000P\144\160\000Q\000Q\145\160\160\000R\000R\160\160\001\001R\001\000\140@\145\160\160\000S\000S\160\160\001\001S\001\000\156@\144\160\000T\000T\144\160\000U\000U\144\160\000V\000V\144\160\000W\000W\144\160\000X\000X\144\160\000Y\000Y\144\160\000Z\000Z\144\160\000[\000[\144\160\000\\\000\\\144\160\000]\000]\144\160\000^\000^\144\160\000_\000_\144\160\000`\000`\144\160\000a\000a\144\160\000b\000b\144\160\000c\000c\144\160\000d\000d\144\160\000e\000e\144\160\000f\000f\144\160\000g\000g\144\160\000h\000h\144\160\000i\000i\144\160\000j\000j\144\160\000k\000k\144\160\000l\000l\144\160\000m\000m\144\160\000n\000n\144\160\000o\000o\144\160\000p\000p\144\160\000q\000q\144\160\000r\000r\144\160\000s\000s\144\160\000t\000t\144\160\000u\000u\144\160\000v\000v\144\160\000w\000w\145\160\160\000x\000x\160\160\001\001x\001\000\159@\144\160\000y\000y\144\160\000z\000z\144\160\000{\000{\144\160\000|\000|\144\160\000}\000}\144\160\000~\000~etmappings.from_uni_list array);;
+ Hashtbl.add Netmappings.to_unicode `Enc_windows1258 windows1258_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1258 windows1258_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1257 windows1257_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1257 windows1257_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1256 windows1256_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1256 windows1256_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1255 windows1255_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1255 windows1255_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1254 windows1254_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1254 windows1254_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1253 windows1253_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1253 windows1253_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1252 windows1252_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1252 windows1252_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1251 windows1251_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1251 windows1251_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_windows1250 windows1250_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_windows1250 windows1250_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_macroman macroman_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_macroman macroman_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_koi8r koi8r_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_koi8r koi8r_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_jis0201 jis0201_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_jis0201 jis0201_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_zapf_dingbats_encoding adobe_zapf_dingbats_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_symbol_encoding adobe_symbol_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_adobe_standard_encoding adobe_standard_encoding_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp875 cp875_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp875 cp875_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp874 cp874_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp874 cp874_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp869 cp869_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp869 cp869_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp866 cp866_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp866 cp866_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp865 cp865_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp865 cp865_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp864 cp864_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp864 cp864_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp863 cp863_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp863 cp863_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp862 cp862_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp862 cp862_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp861 cp861_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp861 cp861_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp860 cp860_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp860 cp860_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp857 cp857_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp857 cp857_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp856 cp856_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp856 cp856_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp855 cp855_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp855 cp855_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp852 cp852_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp852 cp852_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp850 cp850_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp850 cp850_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp775 cp775_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp775 cp775_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp737 cp737_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp737 cp737_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp500 cp500_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp500 cp500_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp437 cp437_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp437 cp437_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp424 cp424_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp424 cp424_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp1026 cp1026_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp1026 cp1026_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp1006 cp1006_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp1006 cp1006_from_unicode;
+Hashtbl.add Netmappings.to_unicode `Enc_cp037 cp037_to_unicode;
+Hashtbl.add Netmappings.from_unicode `Enc_cp037 cp037_from_unicode;
+();;
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+type t =
+ { s_channel : in_channel;
+ s_maxlength : int option;
+ s_blocksize : int;
+ mutable s_current_length : int;
+ mutable s_at_eos : bool;
+ mutable s_win_pos : int;
+ mutable s_win_len : int;
+ s_netbuf : Netbuffer.t;
+ s_iobuf : string;
+ }
+;;
+
+
+let dump s text =
+ print_string ("*** NETSTREAM DUMP " ^ text ^ "\n");
+ Printf.printf "current_length=%d at_eos=%b win_pos=%d win_len=%d\n"
+ s.s_current_length s.s_at_eos s.s_win_pos s.s_win_len;
+ Printf.printf "netbuffer_length=%d netbuffer_size=%d\n"
+ (Netbuffer.length s.s_netbuf)
+ (String.length(Netbuffer.unsafe_buffer s.s_netbuf));
+ Printf.printf "netbuffer=\"%s\"\n"
+ (String.escaped(Netbuffer.contents s.s_netbuf));
+ print_string "*** ---------------\n";
+ flush stdout
+;;
+
+
+let want_another_block s =
+ if not s.s_at_eos then begin
+ (* How much are we allowed to read? *)
+ let m =
+ match s.s_maxlength with
+ None -> s.s_blocksize
+ | Some k -> min (k - s.s_current_length) s.s_blocksize
+ in
+ (* Read this. *)
+ let rec read_block k =
+ if k < m then
+ let n =
+ input s.s_channel s.s_iobuf k (m - k) in
+ ( if n > 0 then
+ read_block (k+n)
+ else (* EOF *)
+ k
+ )
+ else
+ k
+ in
+ let n = read_block 0 in
+ (* If n < blocksize, EOS is reached. *)
+ Netbuffer.add_sub_string s.s_netbuf s.s_iobuf 0 n;
+ s.s_win_len <- s.s_win_len + n;
+ s.s_current_length <- s.s_current_length + n;
+ s.s_at_eos <- n < s.s_blocksize;
+
+ (* dump s "After appending block"; *)
+ end
+;;
+
+
+let want s n =
+ while not s.s_at_eos && s.s_win_len < n do
+ want_another_block s
+ done
+;;
+
+
+let want_minimum s =
+ want s (s.s_blocksize + s.s_blocksize)
+;;
+
+
+let move s n =
+ Netbuffer.delete s.s_netbuf 0 n;
+ s.s_win_pos <- s.s_win_pos + n;
+ s.s_win_len <- s.s_win_len - n;
+ want_minimum s;
+ (* dump s "After move"; *)
+;;
+
+
+let create_from_channel ch maxlength blocksize =
+ let s =
+ { s_channel = ch;
+ s_maxlength = maxlength;
+ s_blocksize = blocksize;
+ s_current_length = 0;
+ s_at_eos = false;
+ s_win_pos = 0;
+ s_win_len = 0;
+ s_netbuf = Netbuffer.create (2*blocksize);
+ s_iobuf = String.create blocksize;
+ }
+ in
+ want_minimum s;
+ s
+;;
+
+
+let create_from_string str =
+ let l = String.length str in
+ { s_channel = stdin;
+ s_maxlength = None;
+ s_blocksize = l;
+ s_current_length = l;
+ s_at_eos = true;
+ s_win_pos = 0;
+ s_win_len = l;
+ s_netbuf =
+ ( let nb = Netbuffer.create l in
+ Netbuffer.add_string nb str;
+ nb
+ );
+ s_iobuf = "";
+ }
+;;
+
+
+let block_size s = s.s_blocksize;;
+
+let current_length s = s.s_current_length;;
+
+let at_eos s = s.s_at_eos;;
+
+let window_position s = s.s_win_pos;;
+
+let window_length s = s.s_win_len;;
+
+let window s = s.s_netbuf;;
+
+let print_stream s =
+ Format.printf
+ "<NETSTREAM window:%d/%d total_length:%d eof=%b>"
+ s.s_win_pos
+ s.s_win_len
+ s.s_current_length
+ s.s_at_eos
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(* A netstream is an input channel that is read block by block. The
+ * fragment of the channel currently loaded into memory is called the
+ * current window of the netstream.
+ *
+ * PICTURE:
+ *
+ * 0 window_position current_length EOS
+ * +------------------+-------------------+--------------------------+
+ * ====================
+ * The current window
+ *
+ * window_length = current_length - window_position
+ *
+ * There is an automatism that the window has a certain length. If possible,
+ * the window is at least twice the block size long, where a "block" is
+ * the amount of data that is read from the input channel in one step.
+ *
+ * (The idea is that you choose as block size the number of bytes you want
+ * to analyze at once, and which must be loaded into memory. You can start
+ * your analysis at window_position and proceed until window_position +
+ * blocksize without having to check whether your window is large enough.
+ * Only when the first blocksize bytes of the window are already processed,
+ * the window must be enlarged by loading the next block.)
+ *
+ * If you want that the window becomes larger, you can call 'want' (to
+ * enlarge the window to a certain size) or 'want_another_block' (to load
+ * just another block from the input channel). Note that this affects only
+ * the current window and not future windows.
+ *
+ * If you do not need the first n bytes of the window anymore, you can
+ * call 'move' to move the beginning of the window by n bytes. If the
+ * window becomes too small after this operation, it is enlarged until
+ * it has twice the block size or until it reaches EOS.
+ *)
+
+type t
+
+val create_from_channel : in_channel -> int option -> int -> t
+ (* create_from_channel ch maxlength blocksize:
+ * The new netstream reads from the channel 'ch'. If maxlength = None,
+ * the channel is read until EOF. If maxlength = Some n, at most n bytes
+ * are read; i.e. the netstream reads until n bytes have been read or
+ * until EOF has been reached, whatever comes first. The blocksize
+ * specifies the number of bytes to read at once.
+ *)
+
+val create_from_string : string -> t
+ (* Creates a new netstream from a string. The initial window of this
+ * netstream is a copy of the passed string.
+ *)
+
+val block_size : t -> int
+ (* Returns the (immutable) block size. *)
+
+val current_length : t -> int
+ (* Returns the number of bytes read so far. *)
+
+val at_eos : t -> bool
+ (* True iff EOS (end of stream) is reached, i.e. the last byte of the
+ * window is the last byte of the stream.
+ *)
+
+val window_position : t -> int
+ (* Returns the absolute position of the current window. *)
+
+val window_length : t -> int
+ (* Returns the length of the current window. *)
+
+val window : t -> Netbuffer.t
+ (* Returns the current window. *)
+
+val move : t -> int -> unit
+ (* move s n:
+ * Moves the window: The first n bytes of the current window are
+ * discarded. If the window would become smaller than twice the
+ * blocksize and if the end of the stream is not yet reached, another
+ * block is read from the input channel and appended to the window.
+ *
+ * PRECONDITION:
+ * - n <= window_length
+ *)
+
+val want : t -> int -> unit
+ (* want s n:
+ * If the window is smaller than n bytes, it is tried to enlarge
+ * the window such that it is at least n bytes long. The enlargement
+ * is not possible if the stream is not long enough; in this case
+ * the window becomes as large as possible.
+ *)
+
+val want_another_block : t -> unit
+ (* Enlarges the window by another block (if possible i.e. if the stream
+ * is long enough).
+ *)
+
+val print_stream : t -> unit
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/24 20:20:33 gerd
+ * Added the toploop printer.
+ *
+ * Revision 1.1 2000/04/15 13:07:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Initialize multi-threading mode: *)
+
+let str_mutex = Mutex.create();;
+let cgi_mutex = Mutex.create();;
+let mappings_mutex = Mutex.create();;
+
+Netstring_str.init_mt
+ (fun () -> Mutex.lock str_mutex)
+ (fun () -> Mutex.unlock str_mutex);
+Cgi.init_mt
+ (fun () -> Mutex.lock cgi_mutex)
+ (fun () -> Mutex.unlock cgi_mutex);
+Netmappings.init_mt
+ (fun () -> Mutex.lock mappings_mutex)
+ (fun () -> Mutex.unlock mappings_mutex)
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/29 00:45:42 gerd
+ * Initializing Netmappings, too
+ *
+ * Revision 1.1 2000/06/25 21:15:27 gerd
+ * Initial revision
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module initializes the multi-threading mode of
+ * Netstring. You must link it with every application that
+ * uses multi-threading.
+ * PITFALL: Link this module _directly_ with the executable,
+ * _don't_ put this module into a cma archive! This would not work!
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/06/25 21:15:27 gerd
+ * Initial revision
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let lock = ref (fun () -> ());;
+let unlock = ref (fun () -> ());;
+
+let init_mt new_lock new_unlock =
+ lock := new_lock;
+ unlock := new_unlock
+;;
+
+let protect f =
+ !lock();
+ try
+ let r = f() in
+ !unlock();
+ r
+ with
+ x ->
+ !unlock();
+ raise x
+;;
+
+type regexp = Str.regexp;;
+type split_result = Str.split_result = Text of string | Delim of string;;
+
+type result =
+ { pos : int;
+ match_beg : int;
+ match_end : int;
+ group_beg : int array;
+ group_end : int array;
+ }
+;;
+
+let regexp s =
+ protect
+ (fun () -> Str.regexp s)
+;;
+
+let regexp_case_fold s =
+ protect
+ (fun () -> Str.regexp_case_fold s)
+;;
+
+let quote s =
+ protect
+ (fun () -> Str.quote s)
+;;
+
+let regexp_string s =
+ protect
+ (fun () -> Str.regexp_string s)
+;;
+
+let regexp_string_case_fold s =
+ protect
+ (fun () -> Str.regexp_string_case_fold s)
+;;
+
+let return_result pos n_groups =
+ let r =
+ { pos = pos;
+ match_beg = (try Str.match_beginning() with Not_found -> -1);
+ match_end = (try Str.match_end() with Not_found -> -1);
+ group_beg = Array.create n_groups (-1);
+ group_end = Array.create n_groups (-1);
+ }
+ in
+ for g = 0 to n_groups - 1 do
+ r.group_beg.(g) <- (try Str.group_beginning (g+1) with Not_found -> -1);
+ r.group_end.(g) <- (try Str.group_end (g+1) with Not_found -> -1);
+ done;
+ r
+;;
+
+let string_match ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ if Str.string_match pat s pos then
+ Some (return_result pos groups)
+ else
+ None
+ )
+;;
+
+let string_partial_match ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ if Str.string_partial_match pat s pos then
+ Some (return_result pos groups)
+ else
+ None
+ )
+;;
+
+let search_forward ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ let i = Str.search_forward pat s pos in
+ i, return_result pos groups
+ )
+;;
+
+let search_backward ?(groups = 9) ~pat s ~pos =
+ protect
+ (fun () ->
+ let i = Str.search_backward pat s pos in
+ i, return_result pos groups
+ )
+;;
+
+let matched_string result s =
+ if result.match_beg < 0 or result.match_end < 0 then raise Not_found;
+ String.sub s result.match_beg (result.match_end - result.match_beg)
+;;
+
+let match_beginning result =
+ if result.match_beg < 0 then raise Not_found;
+ result.match_beg
+;;
+
+let match_end result =
+ if result.match_end < 0 then raise Not_found;
+ result.match_end
+;;
+
+let matched_group result n s =
+ if n < 0 || n >= Array.length result.group_beg then raise Not_found;
+ let gbeg = result.group_beg.(n-1) in
+ let gend = result.group_end.(n-1) in
+ if gbeg < 0 or gend < 0 then raise Not_found;
+ String.sub s gbeg (gend - gbeg)
+;;
+
+let group_beginning result n =
+ if n < 0 || n >= Array.length result.group_beg then raise Not_found;
+ let gbeg = result.group_beg.(n-1) in
+ if gbeg < 0 then raise Not_found else
+ gbeg
+;;
+
+let group_end result n =
+ if n < 0 || n >= Array.length result.group_end then raise Not_found;
+ let gend = result.group_end.(n-1) in
+ if gend < 0 then raise Not_found else
+ gend
+;;
+
+let global_replace ~pat ~templ s =
+ protect
+ (fun () ->
+ Str.global_replace pat templ s)
+;;
+
+let replace_first ~pat ~templ s =
+ protect
+ (fun () ->
+ Str.replace_first pat templ s)
+;;
+
+let global_substitute ?(groups = 9) ~pat ~subst s =
+ protect
+ (fun () ->
+ let xsubst s =
+ let r = return_result 0 groups in
+ subst r s
+ in
+ Str.global_substitute pat xsubst s)
+;;
+
+let substitute_first ?(groups = 9) ~pat ~subst s =
+ protect
+ (fun () ->
+ let xsubst s =
+ let r = return_result 0 groups in
+ subst r s
+ in
+ Str.substitute_first pat xsubst s)
+;;
+
+(* replace_matched: n/a *)
+
+let split ~sep s =
+ protect
+ (fun () ->
+ Str.split sep s)
+;;
+
+let bounded_split ~sep s ~max =
+ protect
+ (fun () ->
+ Str.bounded_split sep s max)
+;;
+
+let split_delim ~sep s =
+ protect
+ (fun () ->
+ Str.split_delim sep s)
+;;
+
+let bounded_split_delim ~sep s ~max =
+ protect
+ (fun () ->
+ Str.bounded_split_delim sep s max)
+;;
+
+let full_split ~sep s =
+ protect
+ (fun () ->
+ Str.full_split sep s)
+;;
+
+let bounded_full_split ~sep s ~max =
+ protect
+ (fun () ->
+ Str.bounded_full_split sep s max)
+;;
+
+let string_before = Str.string_before;;
+let string_after = Str.string_after;;
+let first_chars = Str.first_chars;;
+let last_chars = Str.last_chars;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.1 2000/06/25 20:48:19 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module is a version of Str with a thread-safe interface *)
+
+type regexp = Str.regexp;;
+type split_result = Str.split_result = Text of string | Delim of string;;
+
+type result;;
+ (* The type of matching results *)
+
+val regexp: string -> regexp
+val regexp_case_fold: string -> regexp
+val quote: string -> string
+val regexp_string: string -> regexp
+val regexp_string_case_fold: string -> regexp
+
+val string_match:
+ ?groups:int -> pat:regexp -> string -> pos:int -> result option
+val search_forward:
+ ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
+val search_backward:
+ ?groups:int -> pat:regexp -> string -> pos:int -> (int * result)
+val string_partial_match:
+ ?groups:int -> pat:regexp -> string -> pos:int -> result option
+
+(* The ~groups option specifies how many groups will be stored into
+ * 'result'. Default: 9
+ *)
+
+val matched_string : result -> string -> string
+val match_beginning : result -> int
+val match_end : result -> int
+val matched_group : result -> int -> string -> string
+val group_beginning : result -> int -> int
+val group_end : result -> int -> int
+
+val global_replace: pat:regexp -> templ:string -> string -> string
+val replace_first: pat:regexp -> templ:string -> string -> string
+val global_substitute:
+ ?groups:int ->
+ pat:regexp -> subst:(result -> string -> string) -> string -> string
+val substitute_first:
+ ?groups:int ->
+ pat:regexp -> subst:(result -> string -> string) -> string -> string
+
+(* replace_matched: not available *)
+
+val split: sep:regexp -> string -> string list
+val bounded_split: sep:regexp -> string -> max:int -> string list
+val split_delim: sep:regexp -> string -> string list
+val bounded_split_delim: sep:regexp -> string -> max:int -> string list
+val full_split: sep:regexp -> string -> split_result list
+val bounded_full_split: sep:regexp -> string -> max:int -> split_result list
+
+val string_before: string -> int -> string
+val string_after: string -> int -> string
+val first_chars: string -> len:int -> string
+val last_chars: string -> len:int -> string
+
+(* Private: *)
+
+val init_mt : (unit -> unit) -> (unit -> unit) -> unit
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 21:15:48 gerd
+ * Checked thread-safety.
+ *
+ * Revision 1.1 2000/06/25 20:48:19 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+let exec s =
+ let l = Lexing.from_string s in
+ let ph = !Toploop.parse_toplevel_phrase l in
+ assert(Toploop.execute_phrase false Format.err_formatter ph)
+;;
+
+(* Install the printers: *)
+
+exec "#install_printer Neturl.print_url;;";;
+exec "#install_printer Netbuffer.print_buffer;;";;
+exec "#install_printer Netstream.print_stream;;";;
+exec "#install_printer Cgi.print_argument;;";;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/06/25 22:34:43 gerd
+ * Added labels to arguments.
+ *
+ * Revision 1.1 2000/06/24 20:20:58 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* You may load this module into the toploop in order to install
+ * the printers for the various opaque data types of Netstring.
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/06/25 22:53:45 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+exception Malformed_URL
+
+type url_syntax_option =
+ Url_part_not_recognized
+ | Url_part_allowed
+ | Url_part_required
+
+
+type url_syntax =
+ { url_enable_scheme : url_syntax_option;
+ url_enable_user : url_syntax_option;
+ url_enable_password : url_syntax_option;
+ url_enable_host : url_syntax_option;
+ url_enable_port : url_syntax_option;
+ url_enable_path : url_syntax_option;
+ url_enable_param : url_syntax_option;
+ url_enable_query : url_syntax_option;
+ url_enable_fragment : url_syntax_option;
+ url_enable_other : url_syntax_option;
+ url_accepts_8bits : bool;
+ url_is_valid : url -> bool;
+ }
+
+and url =
+ {
+ url_syntax : url_syntax;
+ mutable url_validity : bool;
+ url_scheme : string option;
+ url_user : string option;
+ url_password : string option;
+ url_host : string option;
+ url_port : int option;
+ url_path : string list;
+ url_param : string list;
+ url_query : string option;
+ url_fragment : string option;
+ url_other : string option;
+ }
+;;
+
+
+type char_category =
+ Accepted
+ | Rejected
+ | Separator
+
+
+
+let scan_url_part s k_from k_to cats accept_8bits =
+ (* Scans the longest word of accepted characters from position 'k_from'
+ * in 's' until at most position 'k_to'. The character following the
+ * word (if any) must be a separator character.
+ * On success, the function returns the position of the last character
+ * of the word + 1.
+ * If there is any rejected character before the separator or the end
+ * of the string (i.e. position 'k_to') is reached, the exception
+ * Malformed_URL is raised.
+ * Furthermore, if the character '%' is accepted it is checked whether
+ * two hexadecimal digits follow (which must be accepted, too). If this
+ * is not true, the exception Malformed_URL is raised, too.
+ * 'cats': contains for every character code (0 to 255) the category
+ * of the character.
+ *)
+ let check_hex c =
+ if cats.( Char.code c ) <> Accepted then raise Malformed_URL;
+ match c with
+ ('0'..'9'|'A'..'F'|'a'..'f') -> ()
+ | _ -> raise Malformed_URL
+ in
+
+ let rec scan k =
+ if k >= k_to then
+ k
+ else begin
+ let c = s.[k] in
+ let cat = cats.(Char.code c) in
+ match cat with
+ Accepted ->
+ if c = '%' then begin
+ if k+2 >= k_to then raise Malformed_URL;
+ let c1 = s.[k+1] in
+ let c2 = s.[k+2] in
+ check_hex c1;
+ check_hex c2;
+ scan (k+3)
+ end
+ else
+ scan (k+1)
+ | Separator -> k
+ | Rejected ->
+ if accept_8bits && c >= '\128'
+ then scan (k+1)
+ else raise Malformed_URL
+ end
+ in
+
+ assert (Array.length cats = 256);
+ assert (k_from >= 0);
+ assert (k_from <= k_to);
+ assert (k_to <= String.length s);
+
+ scan k_from
+;;
+
+
+(* Create a categorization: *)
+
+let lalpha = [ 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
+ 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z' ]
+
+let ualpha = [ 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
+ 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z' ]
+
+let digit = [ '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9' ]
+
+let safe = [ '$'; '-'; '_'; '.'; '+' ]
+
+let extra = [ '!'; '*'; '\''; '('; ')'; ',' ]
+
+let make_cats accepted separators =
+ (* create a categorization:
+ * - All characters listed in 'separators' are separators.
+ * - All characters listed in 'accepted' and which do not occur in
+ * 'separators' are accepted characters.
+ * - All other characters are rejected.
+ *)
+ let cats = Array.make 256 Rejected in
+ List.iter
+ (fun c ->
+ cats.(Char.code c) <- Accepted
+ )
+ accepted;
+
+ List.iter
+ (fun c ->
+ cats.(Char.code c) <- Separator
+ )
+ separators;
+ cats
+;;
+
+
+let scheme_cats =
+ make_cats (lalpha @ ualpha @ ['+'; '-'; '.']) [':'] ;;
+
+ (* scheme_cats: character categorization to _extract_ the URL scheme *)
+
+
+let login_cats =
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @ [';'; '?'; '&'; '='; '%'])
+ [':'; '@'; '/'; '#' ]
+;;
+
+ (* login_cats: character categorization to _extract_ user name, password,
+ * host name, and port.
+ *)
+
+let host_cats =
+ make_cats
+ (lalpha @ ualpha @ digit @ ['.'; '-'])
+ []
+;;
+
+ (* host_cats: character categorization to _check_ whether the host name
+ * is formed only by legal characters.
+ * Especially '%' is not allowed here!
+ *)
+
+let port_cats =
+ make_cats
+ digit
+ []
+;;
+
+ (* port_cats: character categorization to _check_ whether the port number
+ * is formed only by legal characters.
+ * Especially '%' is not allowed here!
+ *)
+
+let path_cats separators =
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @
+ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/'; '~'])
+ separators
+;;
+
+
+let separators_from_syntax syn =
+ let include_if syn_option clist =
+ if syn_option <> Url_part_not_recognized then
+ clist
+ else
+ []
+ in
+ (include_if syn.url_enable_param [';']) @
+ (include_if syn.url_enable_query ['?']) @
+ (include_if syn.url_enable_fragment ['#'])
+;;
+
+
+let path_cats_from_syntax syn extraseps =
+ let separators = separators_from_syntax syn in
+ path_cats (separators @ extraseps)
+;;
+
+(* path_cats_from_syntax:
+ * Computes a character categorization to extract the path from an URL.
+ * This depends on the syntax because the list of possible separators
+ * contains the characters that may begin the next URL clause.
+ *
+ * Notes:
+ * - The '#' is rejected unless fragments are enabled.
+ * - The '~' is accepted although this violates RFC 1738.
+ *)
+
+
+let other_cats_from_syntax syn =
+ let include_if syn_option clist =
+ if syn_option <> Url_part_not_recognized then
+ clist
+ else
+ []
+ in
+ let separators =
+ (include_if syn.url_enable_param [';']) @
+ (include_if syn.url_enable_query ['?']) @
+ (include_if syn.url_enable_fragment ['#'])
+ in
+
+ make_cats
+ (lalpha @ ualpha @ digit @ safe @ extra @
+ (separators @ ['?'; ':'; '@'; '&'; '='; ';'; '%'; '/']))
+ []
+;;
+
+ (* other_cats: character categorization to extract or check the
+ * "other" part of the URL.
+ *)
+
+
+
+let extract_url_scheme s =
+ let l = String.length s in
+ let k = scan_url_part s 0 l scheme_cats false in
+ (* or raise Malformed_URL *)
+ if k = l then raise Malformed_URL;
+ assert (s.[k] = ':');
+ String.lowercase(String.sub s 0 k)
+;;
+
+
+let ( => ) a b = not a or b;; (* implication *)
+
+let ( <=> ) (a:bool) b = ( a = b );; (* equivalence *)
+
+let url_syntax_is_valid syn =
+ let recognized x = x <> Url_part_not_recognized in
+ let not_recognized x = x = Url_part_not_recognized in
+ (recognized syn.url_enable_password => recognized syn.url_enable_user) &
+ (recognized syn.url_enable_port => recognized syn.url_enable_host) &
+ (recognized syn.url_enable_user => recognized syn.url_enable_host) &
+ not ( (recognized syn.url_enable_user ||
+ recognized syn.url_enable_password ||
+ recognized syn.url_enable_host ||
+ recognized syn.url_enable_port ||
+ recognized syn.url_enable_path) &&
+ (recognized syn.url_enable_other))
+;;
+
+
+let partial_url_syntax syn =
+ let weaken =
+ function
+ Url_part_not_recognized -> Url_part_not_recognized
+ | Url_part_allowed -> Url_part_allowed
+ | Url_part_required -> Url_part_allowed
+ in
+ { url_enable_scheme = weaken syn.url_enable_scheme;
+ url_enable_user = weaken syn.url_enable_user;
+ url_enable_password = weaken syn.url_enable_password;
+ url_enable_host = weaken syn.url_enable_host;
+ url_enable_port = weaken syn.url_enable_port;
+ url_enable_path = weaken syn.url_enable_path;
+ url_enable_param = weaken syn.url_enable_param;
+ url_enable_query = weaken syn.url_enable_query;
+ url_enable_fragment = weaken syn.url_enable_fragment;
+ url_enable_other = weaken syn.url_enable_other;
+ url_accepts_8bits = syn.url_accepts_8bits;
+ url_is_valid = syn.url_is_valid;
+ }
+;;
+
+
+
+let file_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_allowed;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_required;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let ftp_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_allowed;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let http_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_allowed;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let mailto_url_syntax =
+ { url_enable_scheme = Url_part_required;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_not_recognized;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_not_recognized;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_required;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let null_url_syntax =
+ { url_enable_scheme = Url_part_not_recognized;
+ url_enable_user = Url_part_not_recognized;
+ url_enable_password = Url_part_not_recognized;
+ url_enable_host = Url_part_not_recognized;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_not_recognized;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_not_recognized;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let ip_url_syntax =
+ { url_enable_scheme = Url_part_allowed;
+ url_enable_user = Url_part_allowed;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_allowed;
+ url_enable_port = Url_part_allowed;
+ url_enable_path = Url_part_allowed;
+ url_enable_param = Url_part_allowed;
+ url_enable_query = Url_part_allowed;
+ url_enable_fragment = Url_part_allowed;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = (fun _ -> true);
+ }
+;;
+
+
+let common_url_syntax =
+ let h = Hashtbl.create 10 in
+ Hashtbl.add h "file" file_url_syntax;
+ Hashtbl.add h "ftp" ftp_url_syntax;
+ Hashtbl.add h "http" http_url_syntax;
+ Hashtbl.add h "mailto" mailto_url_syntax;
+ h
+;;
+
+
+let url_conforms_to_syntax url =
+ let recognized x = x <> Url_part_not_recognized in
+ let required x = x = Url_part_required in
+ let present x = x <> None in
+ let syn = url.url_syntax in
+ (present url.url_scheme => recognized syn.url_enable_scheme) &
+ (present url.url_user => recognized syn.url_enable_user) &
+ (present url.url_password => recognized syn.url_enable_password) &
+ (present url.url_host => recognized syn.url_enable_host) &
+ (present url.url_port => recognized syn.url_enable_port) &
+ ((url.url_path <> []) => recognized syn.url_enable_path) &
+ ((url.url_param <> []) => recognized syn.url_enable_param) &
+ (present url.url_query => recognized syn.url_enable_query) &
+ (present url.url_fragment => recognized syn.url_enable_fragment) &
+ (present url.url_other => recognized syn.url_enable_other) &
+ (required syn.url_enable_scheme => present url.url_scheme) &
+ (required syn.url_enable_user => present url.url_user) &
+ (required syn.url_enable_password => present url.url_password) &
+ (required syn.url_enable_host => present url.url_host) &
+ (required syn.url_enable_port => present url.url_port) &
+ (required syn.url_enable_path => (url.url_path <> [])) &
+ (required syn.url_enable_param => (url.url_param <> [])) &
+ (required syn.url_enable_query => present url.url_query) &
+ (required syn.url_enable_fragment => present url.url_fragment) &
+ (required syn.url_enable_other => present url.url_other) &
+ (url.url_validity or syn.url_is_valid url)
+;;
+
+
+let url_syntax_of_url url = url.url_syntax
+;;
+
+
+let modify_url
+ ?syntax
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let encode = Netencoding.Url.encode in
+ let enc x =
+ if encoded then
+ x
+ else
+ match x with
+ None -> None
+ | Some x' -> Some (encode x')
+ in
+ let enc_list l =
+ if encoded then
+ l
+ else
+ List.map encode l
+ in
+
+ let new_syntax =
+ match syntax with
+ None -> url.url_syntax
+ | Some syn -> syn
+ in
+
+ let check_string s_opt cats =
+ match s_opt with
+ None -> ()
+ | Some s ->
+ let l = String.length s in
+ let k = scan_url_part s 0 l cats new_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ if k <> l then raise Malformed_URL
+ in
+
+ let check_string_list p cats sep =
+ List.iter
+ (fun p_component ->
+ let l = String.length p_component in
+ let k =
+ scan_url_part p_component 0 l cats new_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ if k <> l then raise Malformed_URL;
+ if String.contains p_component sep then raise Malformed_URL;
+ )
+ p
+ in
+
+ (* Create the modified record: *)
+ let url' =
+ {
+ url_syntax = new_syntax;
+ url_validity = false;
+ url_scheme = if scheme = None then url.url_scheme else scheme;
+ url_user = if user = None then url.url_user else enc user;
+ url_password = if password = None then url.url_password else enc password;
+ url_host = if host = None then url.url_host else host;
+ url_port = if port = None then url.url_port else port;
+ url_path = (match path with
+ None -> url.url_path
+ | Some p -> enc_list p);
+ url_param = (match param with
+ None -> url.url_param
+ | Some p -> enc_list p);
+ url_query = if query = None then url.url_query else enc query;
+ url_fragment = if fragment = None then url.url_fragment else enc fragment;
+ url_other = if other = None then url.url_other else enc other;
+ }
+ in
+ (* Check whether the URL conforms to the syntax:
+ *)
+ if not (url_conforms_to_syntax url') then raise Malformed_URL;
+ if url'.url_password <> None && url'.url_user = None then raise Malformed_URL;
+ if url'.url_user <> None && url'.url_host = None then raise Malformed_URL;
+ if url'.url_port <> None && url'.url_host = None then raise Malformed_URL;
+ (* Check every part: *)
+ check_string url'.url_scheme scheme_cats;
+ check_string url'.url_user login_cats;
+ check_string url'.url_password login_cats;
+ check_string url'.url_host host_cats;
+ (match url'.url_port with
+ None -> ()
+ | Some p -> if p < 0 || p > 65535 then raise Malformed_URL
+ );
+ let path_cats = path_cats_from_syntax new_syntax [] in
+ let other_cats = other_cats_from_syntax new_syntax in
+ check_string url'.url_query path_cats;
+ check_string url'.url_fragment path_cats;
+ check_string url'.url_other other_cats;
+ (* Check the lists: *)
+ check_string_list url'.url_param path_cats ';';
+ check_string_list url'.url_path path_cats '/';
+ (* Further path checks: *)
+ begin match url'.url_path with
+ [] ->
+ (* The path is empty: There must not be a 'param' or 'query' *)
+ if url'.url_host <> None then begin
+ if url'.url_param <> [] then raise Malformed_URL;
+ if url'.url_query <> None then raise Malformed_URL;
+ end
+ | ["";""] ->
+ (* This is illegal. *)
+ raise Malformed_URL;
+ | "" :: p' ->
+ (* The path is absolute: always ok *)
+ ()
+ | _ ->
+ (* The path is relative: there must not be a host *)
+ if url'.url_host <> None then raise Malformed_URL;
+ end;
+ begin match url'.url_path with
+ _ :: rest -> (* "//" ambiguity *)
+ begin match List.rev rest with
+ _ :: rest' ->
+ if List.exists (fun p -> p = "") rest' then
+ raise Malformed_URL;
+ | [] ->
+ ()
+ end
+ | [] ->
+ ()
+ end;
+ (* Cache that the URL is valid: *)
+ url'.url_validity <- true;
+
+ url'
+;;
+
+
+let null_url =
+ {
+ url_syntax = null_url_syntax;
+ url_validity = true;
+ url_scheme = None;
+ url_user = None;
+ url_password = None;
+ url_host = None;
+ url_port = None;
+ url_path = [];
+ url_param = [];
+ url_query = None;
+ url_fragment = None;
+ url_other = None;
+ }
+;;
+
+
+let make_url
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url_syntax
+ =
+
+ if not (url_syntax_is_valid url_syntax) then
+ invalid_arg "Neturl.make_url";
+
+ modify_url
+ ~encoded:encoded
+ ~syntax:url_syntax
+ ?scheme:scheme
+ ?user:user
+ ?password:password
+ ?host:host
+ ?port:port
+ ?path:path
+ ?param:param
+ ?query:query
+ ?fragment:fragment
+ ?other:other
+ null_url
+;;
+
+
+let remove_from_url
+ ?(scheme = false)
+ ?(user = false)
+ ?(password = false)
+ ?(host = false)
+ ?(port = false)
+ ?(path = false)
+ ?(param = false)
+ ?(query = false)
+ ?(fragment = false)
+ ?(other = false)
+ url
+ =
+
+ make_url
+ ~encoded: true
+ ?scheme: (if scheme then None else url.url_scheme)
+ ?user: (if user then None else url.url_user)
+ ?password: (if password then None else url.url_password)
+ ?host: (if host then None else url.url_host)
+ ?port: (if port then None else url.url_port)
+ ?path: (if path then None else Some url.url_path)
+ ?param: (if param then None else Some url.url_param)
+ ?query: (if query then None else url.url_query)
+ ?fragment: (if fragment then None else url.url_fragment)
+ ?other: (if other then None else url.url_other)
+ url.url_syntax
+;;
+
+
+let default_url
+ ?(encoded = false)
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?(path = [])
+ ?(param = [])
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let encode = Netencoding.Url.encode in
+
+ let enc x =
+ if encoded then
+ x
+ else
+ match x with
+ None -> None
+ | Some x' -> Some (encode x')
+ in
+
+ let enc_list l =
+ if encoded then
+ l
+ else
+ List.map encode l
+ in
+
+ let pass_if_missing current arg =
+ match current with
+ None -> arg
+ | _ -> current
+ in
+
+ make_url
+ ~encoded: true
+ ?scheme: (pass_if_missing url.url_scheme scheme)
+ ?user: (pass_if_missing url.url_user (enc user))
+ ?password: (pass_if_missing url.url_password (enc password))
+ ?host: (pass_if_missing url.url_host host)
+ ?port: (pass_if_missing url.url_port port)
+ ~path: (if url.url_path = [] then enc_list path else url.url_path)
+ ~param: (if url.url_param = [] then enc_list param else url.url_param)
+ ?query: (pass_if_missing url.url_query (enc query))
+ ?fragment: (pass_if_missing url.url_fragment (enc fragment))
+ ?other: (pass_if_missing url.url_other (enc other))
+ url.url_syntax
+;;
+
+
+let undefault_url
+ ?scheme
+ ?user
+ ?password
+ ?host
+ ?port
+ ?path
+ ?param
+ ?query
+ ?fragment
+ ?other
+ url
+ =
+
+ let remove_if_matching current arg =
+ match current with
+ None -> None
+ | Some x ->
+ (match arg with
+ None -> current
+ | Some x' ->
+ if x=x' then
+ None
+ else
+ current)
+ in
+
+ make_url
+ ~encoded: true
+ ?scheme: (remove_if_matching url.url_scheme scheme)
+ ?user: (remove_if_matching url.url_user user)
+ ?password: (remove_if_matching url.url_password password)
+ ?host: (remove_if_matching url.url_host host)
+ ?port: (remove_if_matching url.url_port port)
+ ~path: (match path with
+ None -> url.url_path
+ | Some x ->
+ if x = url.url_path then
+ []
+ else
+ url.url_path)
+ ~param: (match param with
+ None -> url.url_param
+ | Some x ->
+ if x = url.url_param then
+ []
+ else
+ url.url_param)
+ ?query: (remove_if_matching url.url_query query)
+ ?fragment: (remove_if_matching url.url_fragment fragment)
+ ?other: (remove_if_matching url.url_other other)
+ url.url_syntax
+;;
+
+
+let url_provides
+ ?(scheme = false)
+ ?(user = false)
+ ?(password = false)
+ ?(host = false)
+ ?(port = false)
+ ?(path = false)
+ ?(param = false)
+ ?(query = false)
+ ?(fragment = false)
+ ?(other = false)
+ url
+ =
+
+ (scheme => (url.url_scheme <> None)) &
+ (user => (url.url_user <> None)) &
+ (password => (url.url_password <> None)) &
+ (host => (url.url_host <> None)) &
+ (port => (url.url_port <> None)) &
+ (path => (url.url_path <> [])) &
+ (param => (url.url_param <> [])) &
+ (query => (url.url_query <> None)) &
+ (fragment => (url.url_fragment <> None)) &
+ (other => (url.url_other <> None))
+;;
+
+
+let return_if value =
+ match value with
+ None -> raise Not_found
+ | Some x -> x
+;;
+
+
+let decode_if want_encoded value =
+ let value' = return_if value in
+ if want_encoded then
+ value'
+ else
+ Netencoding.Url.decode value' (* WARNING: not thread-safe! *)
+;;
+
+
+let decode_path_if want_encoded value =
+ if want_encoded then
+ value
+ else
+ List.map Netencoding.Url.decode value (* WARNING: not thread-safe! *)
+;;
+
+
+let url_scheme url = return_if url.url_scheme;;
+let url_user ?(encoded=false) url = decode_if encoded url.url_user;;
+let url_password ?(encoded=false) url = decode_if encoded url.url_password;;
+let url_host url = return_if url.url_host;;
+let url_port url = return_if url.url_port;;
+let url_path ?(encoded=false) url = decode_path_if encoded url.url_path;;
+let url_param ?(encoded=false) url = decode_path_if encoded url.url_param;;
+let url_query ?(encoded=false) url = decode_if encoded url.url_query;;
+let url_fragment ?(encoded=false) url = decode_if encoded url.url_fragment;;
+let url_other ?(encoded=false) url = decode_if encoded url.url_other;;
+
+
+let string_of_url url =
+ if not (url.url_validity) then
+ failwith "Neturl.string_of_url: URL not flagged as valid";
+ (match url.url_scheme with
+ None -> ""
+ | Some s -> s ^ ":") ^
+ (match url.url_host with
+ None -> ""
+ | Some host ->
+ "//" ^
+ (match url.url_user with
+ None -> ""
+ | Some user ->
+ user ^
+ (match url.url_password with
+ None -> ""
+ | Some password ->
+ ":" ^ password
+ ) ^
+ "@") ^
+ host ^
+ (match url.url_port with
+ None -> ""
+ | Some port ->
+ ":" ^ string_of_int port)) ^
+ (match url.url_path with
+ | [""] ->
+ "/"
+ | x :: p when url.url_scheme = None &&
+ url.url_host = None &&
+ String.contains x ':'
+ ->
+ (* Really a special case: The colon contained in 'x' may cause
+ * that a prefix of 'x' is interpreted as URL scheme. In this
+ * case, "./" is prepended (as recommended in RFC 1808, 5.3).
+ *)
+ "./"
+ | _ ->
+ ""
+ ) ^
+ String.concat "/" url.url_path ^
+ (match url.url_other with
+ None -> ""
+ | Some other ->
+ other) ^
+ String.concat "" (List.map (fun s -> ";" ^ s) url.url_param) ^
+ (match url.url_query with
+ None -> ""
+ | Some query ->
+ "?" ^ query) ^
+ (match url.url_fragment with
+ None -> ""
+ | Some fragment ->
+ "#" ^ fragment)
+;;
+
+
+let url_of_string url_syntax s =
+ let l = String.length s in
+ let recognized x = x <> Url_part_not_recognized in
+
+ let rec collect_words terminators eof_char cats k =
+ (* Collect words as recognized by 'cats', starting at position 'k' in
+ * 's'. Collection stops if one the characters listed in 'terminators'
+ * is found. If the end of the string is reached, it is treated as
+ * 'eof_char'.
+ *)
+ let k' = scan_url_part s k l cats url_syntax.url_accepts_8bits in
+ (* or raise Malformed_URL *)
+ let word, sep =
+ String.sub s k (k'-k), (if k'<l then s.[k'] else eof_char) in
+ if List.mem sep terminators then
+ [word, sep], k'
+ else
+ let word_sep_list', k'' =
+ collect_words terminators eof_char cats (k'+1) in
+ ((word, sep) :: word_sep_list'), k''
+ in
+
+ (* Try to extract the scheme name: *)
+ let scheme, k1 =
+ if recognized url_syntax.url_enable_scheme then
+ try
+ let k = scan_url_part s 0 l scheme_cats false in
+ (* or raise Malformed_URL *)
+ if k = l then raise Malformed_URL;
+ assert (s.[k] = ':');
+ Some (String.sub s 0 k), (k+1)
+ with
+ Malformed_URL -> None, 0
+ else
+ None, 0
+ in
+
+ (* If there is a "//", a host will follow: *)
+ let host, port, user, password, k2 =
+ if recognized url_syntax.url_enable_host &&
+ k1 + 2 <= l && s.[k1]='/' && s.[k1+1]='/' then begin
+
+ let word_sep_list, k' = collect_words [ '/'; '#' ] '/' login_cats (k1+2)
+ in
+ (* or raise Malformed_URL *)
+
+ let int x =
+ try int_of_string x with _ -> raise Malformed_URL in
+
+ match word_sep_list with
+ [ host, ('/'|'#') ] ->
+ Some host, None, None, None, k'
+ | [ host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), None, None, k'
+ | [ user, '@'; host, ('/'|'#') ] ->
+ Some host, None, Some user, None, k'
+ | [ user, '@'; host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), Some user, None, k'
+ | [ user, ':'; password, '@'; host, ('/'|'#') ] ->
+ Some host, None, Some user, Some password, k'
+ | [ user, ':'; password, '@'; host, ':'; port, ('/'|'#') ] ->
+ Some host, Some (int port), Some user, Some password, k'
+ | _ ->
+ raise Malformed_URL
+ end
+ else
+ None, None, None, None, k1
+ in
+
+ let path, k3 =
+ if recognized url_syntax.url_enable_path &&
+ k2 < l (* && s.[k2]='/' *)
+ then begin
+ let cats = path_cats_from_syntax url_syntax [ '/' ] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats k2 in
+ (* or raise Malformed_URL *)
+ match word_sep_list with
+ [ "", '/'; "", _ ] ->
+ [ "" ], k'
+ | [ "", _ ] ->
+ [], k'
+ | _ ->
+ List.map fst word_sep_list, k'
+ end
+ else begin
+ (* If there is a single '/': skip it *)
+ if not (recognized url_syntax.url_enable_other) &&
+ k2 < l && s.[k2]='/'
+ then
+ [], (k2+1)
+ else
+ [], k2
+ end
+ in
+
+ let other, k4 =
+ if recognized url_syntax.url_enable_other &&
+ k3 < l
+ then begin
+
+ let cats = other_cats_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ['>';'#'] '>' cats k3 in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ other, _ ] -> Some other, k'
+ | _ -> assert false
+ end
+ else
+ None, k3
+ in
+
+ let param, k5 =
+ if recognized url_syntax.url_enable_param &&
+ k4 < l && s.[k4]=';'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+ let seps' = List.filter (fun c -> c <> ';') seps in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps') '>' cats (k4+1) in
+ (* or raise Malformed_URL *)
+
+ List.map fst word_sep_list, k'
+ end
+ else
+ [], k4
+ in
+
+ let query, k6 =
+ if recognized url_syntax.url_enable_query &&
+ k5 < l && s.[k5]='?'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k5+1) in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ query, _ ] -> Some query, k'
+ | _ -> assert false
+ end
+ else
+ None, k5
+ in
+
+ let fragment, k7 =
+ if recognized url_syntax.url_enable_fragment &&
+ k6 < l && s.[k6]='#'
+ then begin
+ let cats = path_cats_from_syntax url_syntax [] in
+ let seps = separators_from_syntax url_syntax in
+
+ (* Note: '>' is not allowed within URLs; because of this we can use
+ * it as end-of-string character.
+ *)
+
+ let word_sep_list, k' = collect_words ('>'::seps) '>' cats (k6+1) in
+ (* or raise Malformed_URL *)
+
+ match word_sep_list with
+ [ fragment, _ ] -> Some fragment, k'
+ | _ -> assert false
+ end
+ else
+ None, k6
+ in
+
+ if k7 <> l then raise Malformed_URL;
+
+ make_url
+ ~encoded:true
+ ?scheme:scheme
+ ?user:user
+ ?password:password
+ ?host:host
+ ?port:port
+ ~path:path
+ ~param:param
+ ?query:query
+ ?fragment:fragment
+ ?other:other
+ url_syntax
+;;
+
+
+let split_path s =
+ let l = String.length s in
+ let rec collect_words k =
+ let k' =
+ try
+ String.index_from s k '/'
+ with
+ Not_found -> l
+ in
+ let word = String.sub s k (k'-k) in
+ if k' >= l then
+ [word]
+ else
+ word :: collect_words (k'+1)
+ in
+ match collect_words 0 with
+ [ "" ] -> []
+ | [ "";"" ] -> [ "" ]
+ | other -> other
+;;
+
+
+let join_path l =
+ match l with
+ [ "" ] -> "/"
+ | _ -> String.concat "/" l;;
+
+
+let norm_path l =
+
+ let rec remove_slash_slash l first =
+ match l with
+ | [ "" ] ->
+ [ "" ]
+ | [ ""; "" ] when first ->
+ [ "" ]
+ | "" :: l' when not first ->
+ remove_slash_slash l' false
+ | x :: l' ->
+ x :: remove_slash_slash l' false
+ | [] ->
+ []
+ in
+
+ let rec remove_dot l first =
+ match l with
+ | ([ "." ] | ["."; ""]) ->
+ if first then [] else [ "" ]
+ | "." :: x :: l' ->
+ remove_dot (x :: l') false
+ | x :: l' ->
+ x :: remove_dot l' false
+ | [] ->
+ []
+ in
+
+ let rec remove_dot_dot_once l first =
+ match l with
+ x :: ".." :: [] when x <> "" && x <> ".." && not first ->
+ [ "" ]
+ | x :: ".." :: l' when x <> "" && x <> ".." ->
+ l'
+ | x :: l' ->
+ x :: remove_dot_dot_once l' false
+ | [] ->
+ raise Not_found
+ in
+
+ let rec remove_dot_dot l =
+ try
+ let l' = remove_dot_dot_once l true in
+ remove_dot_dot l'
+ with
+ Not_found -> l
+ in
+
+ let l' = remove_dot_dot (remove_dot (remove_slash_slash l true) true) in
+ match l' with
+ [".."] -> [".."; ""]
+ | ["";""] -> [ "" ]
+ | _ -> l'
+;;
+
+
+let apply_relative_url baseurl relurl =
+ if not (baseurl.url_validity) or not (relurl.url_validity) then
+ failwith "Neturl.apply_relative_url: URL not flagged as valid";
+
+ if relurl.url_scheme <> None then
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax *)
+ relurl
+ else
+ if relurl.url_host <> None then
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax and scheme *)
+ ?scheme:baseurl.url_scheme
+ relurl
+ else
+ match relurl.url_path with
+ "" :: other ->
+ (* An absolute path *)
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
+ ~encoded:true
+ ?scheme:baseurl.url_scheme (* login info *)
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ relurl
+ | [] ->
+ (* Empty: Inherit also path, params, query, and fragment *)
+ let new_params, new_query, new_fragment =
+ match relurl.url_param, relurl.url_query, relurl.url_fragment
+ with
+ [], None, None ->
+ (* Inherit all three *)
+ baseurl.url_param, baseurl.url_query, baseurl.url_fragment
+ | [], None, f ->
+ (* Inherit params and query *)
+ baseurl.url_param, baseurl.url_query, f
+ | [], q, f ->
+ (* Inherit params *)
+ baseurl.url_param, q, f
+ | p, q, f ->
+ (* Inherit none of them *)
+ p, q, f
+ in
+ modify_url
+ ~syntax:baseurl.url_syntax
+ ~encoded:true
+ ?scheme:baseurl.url_scheme
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ ~path:baseurl.url_path
+ ~param:new_params
+ ?query:new_query
+ ?fragment:new_fragment
+ relurl
+ | relpath ->
+ (* A relative path *)
+ let rec change_path basepath =
+ match basepath with
+ | [] ->
+ relpath
+ | [ x ] ->
+ relpath
+ | x :: basepath' ->
+ x :: change_path basepath'
+ in
+ let new_path = norm_path (change_path baseurl.url_path) in
+ modify_url
+ ~syntax:baseurl.url_syntax (* inherit syntax, scheme, and *)
+ ~encoded:true
+ ?scheme:baseurl.url_scheme (* login info *)
+ ?host:baseurl.url_host
+ ?port:baseurl.url_port
+ ?user:baseurl.url_user
+ ?password:baseurl.url_password
+ ~path:new_path (* and change path *)
+ relurl
+
+;;
+
+
+let print_url url =
+ Format.print_string ("<URL:" ^ string_of_url url ^ ">")
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:28 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/07/04 21:50:51 gerd
+ * Fixed typo.
+ *
+ * Revision 1.3 2000/06/26 22:57:49 gerd
+ * Change: The record 'url_syntax' has an additional component
+ * 'url_accepts_8bits'. Setting this option to 'true' causes that
+ * the bytes >= 0x80 are no longer rejected.
+ *
+ * Revision 1.2 2000/06/25 19:39:48 gerd
+ * Lots of Bugfixes.
+ *
+ * Revision 1.1 2000/06/24 20:19:59 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This module applies already O'Caml-3 features. *)
+
+(* Uniform Resource Locators (URLs):
+ *
+ * This module provides functions to parse URLs, to print URLs, to
+ * store URLs, to modify URLs, and to apply relative URLs.
+ *
+ * URLs are strings formed according to pattern (1) or (2):
+ *
+ * (1) scheme://user:password@host:port/path;params?query#fragment
+ * (2) scheme:other;params?query#fragment
+ *
+ * The word at the beginning of the URL identifies the URL scheme
+ * (such as "http" or "file"). Depending on the scheme, not all of the
+ * parts are allowed, or parts may be omitted. This module defines the
+ * type 'url_syntax' whose values describe which parts are allowed/required/
+ * not allowed for a concrete URL scheme (see below).
+ *
+ * Not all characters are allowed in a URL. Some characters are allowed,
+ * but have the special task to separate the various parts of the URL
+ * (reserved characters).
+ * However, it is possible to include even invalid or reserved characters
+ * as normal content by applying the '%'-encoding on these characters:
+ * A '%' indicates that an encoded character follows, and the character
+ * is denoted by a two-digit hexadecimal number (e.g. %2f for '/').
+ * In the following descriptions, the term "encoded string" means a string
+ * containing such %-encoded characters, and the "decoded string" means a
+ * string not containing such characters.
+ * See the module Netencoding.Url for functions encoding or decoding
+ * strings.
+ *
+ * The type 'url' describes values storing the components of a URL,
+ * and the 'url_syntax' for the URL. In general, the components are
+ * stored as encoded strings; however, not for all components the
+ * '%'-encoding is applicable.
+ * For convenience, the functions creating, modifying, and accessing
+ * URLs can handle both encoded and decoded strings. In order to
+ * avoid errors, the functions pass strings even in their decoded form.
+ *
+ * Note that there is currently no function to compare URLs. The
+ * canoncical comparison ( = ) is not applicable because the same URL
+ * may be written differently.
+ *
+ * Note that nothing is said about the character set/encoding of URLs.
+ * Some protocols and standards prefer UTF-8 as fundamental encoding
+ * and apply the '%'-encoding on top of it; i.e. the byte sequence
+ * representing a character in UTF-8 is '%'-encoded. There is no special
+ * support for this technique.
+ *
+ * For more information about URLs, see RFCs 1738 and 1808.
+ *)
+
+exception Malformed_URL
+(* Is raised by a number of functions when encountering a badly formed
+ * URL.
+ *)
+
+val extract_url_scheme : string -> string
+ (* Returns the URL scheme from the string representation of an URL.
+ * E.g. extract_url_scheme "http://host/path" = "http".
+ * The scheme name is always converted to lowercase characters.
+ * Raises Malformed_URL if the scheme name is not found.
+ *)
+
+type url_syntax_option =
+ Url_part_not_recognized
+ | Url_part_allowed
+ | Url_part_required
+
+
+type url_syntax =
+ { url_enable_scheme : url_syntax_option;
+ url_enable_user : url_syntax_option;
+ url_enable_password : url_syntax_option;
+ url_enable_host : url_syntax_option;
+ url_enable_port : url_syntax_option;
+ url_enable_path : url_syntax_option;
+ url_enable_param : url_syntax_option;
+ url_enable_query : url_syntax_option;
+ url_enable_fragment : url_syntax_option;
+ url_enable_other : url_syntax_option;
+ url_accepts_8bits : bool;
+ url_is_valid : url -> bool;
+ }
+
+and url
+;;
+
+(* Values of type 'url_syntax' describe which components of an URL are
+ * recognized, which are allowed (and optional), and which are required.
+ * Not all combinations are valid; the predicate expressed by the
+ * function 'url_syntax_is_valid' must hold.
+ * The function 'url_is_valid' is applied when a fresh URL is created
+ * and must return 'true'. This function allows it to add an arbitrary
+ * validity criterion to 'url_syntax'. (Note that the URL passed to
+ * this function is not fully working; you can safely assume that the
+ * accessor functions url_scheme etc. can be applied to it.)
+ *
+ * Switch 'url_accepts_8bit': If 'true', the bytes with code 128 to
+ * 255 are treated like alphanumeric characters; if 'false' these bytes
+ * are illegal (but it is still possible to include such byte in their
+ * encoded form: %80 to %FF).
+ *
+ * Values of type 'url' describe concrete URLs. Every URL must have
+ * a fundamental 'url_syntax', and it is only possible to create URLs
+ * conforming to the syntax. See 'make_url' for further information.
+ *)
+
+
+val url_syntax_is_valid : url_syntax -> bool
+ (* Checks whether the passed url_syntax is valid. This means:
+ *
+ * - If passwords are recognized, users (and hosts) must be recognized, too
+ * - If ports are recognized, hosts must be recognized, too
+ * - If users are recognized, hosts must be recognized, too
+ * - Either the syntax recognizes one of the phrases
+ * { user, password, host, port, path }, or the syntax recognized
+ * the phrase 'other'.
+ *)
+
+
+val partial_url_syntax : url_syntax -> url_syntax
+ (* Transforms the syntax into another syntax where all required parts are
+ * changed into optional parts.
+ *)
+
+
+(* Note that all following url_syntaxes do not allow 8bit bytes. *)
+
+val null_url_syntax : url_syntax
+
+val ip_url_syntax : url_syntax
+ (* Maximum syntax for IP based protocols *)
+
+val common_url_syntax : (string, url_syntax) Hashtbl.t
+ (* Syntax descriptions for common URL schemes:
+ *
+ * null_url_syntax: nothing is recognized
+ *
+ * common_url_syntax: Hashtable mapping from URL scheme names to
+ * definitions of syntaxes:
+ *
+ * "file": scheme, host?, path
+ * "ftp": scheme, user?, password?, host, port?, path?, param?
+ * "http": scheme, user?, password?, host, port?, path?, query?
+ * "mailto": scheme, other
+ *
+ * Notes:
+ * (1) These syntax descriptions can be weakened for partial/relative URLs
+ * by changing the required parts to optional parts: See the function
+ * 'partial_url_syntax'.
+ * (2) None of the descriptions allows fragments. These can be enabled by
+ * setting 'url_enable_fragment' to Url_part_allowed. E.g.
+ * { file_url_syntax with url_enable_fragment = Url_part_allowed }
+ *)
+
+val null_url : url
+ (* A URL without any component and 'null_url_syntax'
+ *)
+
+val make_url :
+ ?encoded:bool ->
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url_syntax ->
+ url
+ (* Creates a URL from components:
+ *
+ * - The components "scheme" and "host" are simple strings to which the
+ * '%'-encoding is not applicable.
+ * - The component "port" is a simple number. Of course, the '%'-encoding
+ * is not applicable, too.
+ * - The components "user", "password", "query", "fragment", and "other"
+ * are strings which may contains '%'-encoded characters. By default,
+ * you can pass any string for these components, and problematic characters
+ * are automatically encoded. If you set ~encoded:true, the passed
+ * strings must already be encoded, but the function checks whether
+ * the encoding is correct.
+ * Note that for "query" even the characters '?' and '=' are encoded
+ * by default, so you need to set ~encoded:true to pass a reasonable
+ * query string.
+ * - The components "path" and "param" are lists of strings which may
+ * contain '%'-encoded characters. Again, the default is to pass
+ * decoded strings to the function, and the function encodes them
+ * automatically, and by setting ~encoded:true the caller is responsible
+ * for encoding the strings.
+ * path = [] and params = [] mean that no path and no parameters are
+ * specified, respectively.
+ * See below for the respresentation of these components.
+ *
+ * Except of "path", the strings representing the components do not
+ * contain the characters separating the components from each other.
+ * The "path" component includes the '/' at the beginning of the path
+ * (if present).
+ *
+ * The created URL must conform to the 'url_syntax', i.e.
+ * - The URL must only contain components which are recognized by the
+ * syntax
+ * - The URL must contain components which are required by the syntax
+ * - The URL must fulfill the predicate expressed by the 'url_is_valid'
+ * function of the syntax.
+ *
+ * The path of a URL is represented as a list of '/'-separated path
+ * components. i.e.
+ * [ s1; s2; ...; sN ] represents the path
+ * s1 ^ "/" ^ s2 ^ "/" ^ ... ^ "/" ^ sN
+ * As special cases:
+ * [] is the non-existing path
+ * [ "" ] is "/"
+ * [ "";"" ] is illegal
+ *
+ * Except of s1 and sN, the path components must not be empty strings.
+ *
+ * To avoid ambiguities, it is illegal to create URLs with both relative
+ * paths (s1 <> "") and host components.
+ *
+ * Parameters of URLs are components beginning with ';'. The list
+ * of parameters is represented as list of strings where the strings
+ * contain the value following ';'.
+ *)
+
+val modify_url :
+ ?syntax:url_syntax ->
+ ?encoded:bool ->
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url ->
+ url
+ (* Modifies the passed components and returns the modified URL.
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val remove_from_url :
+ ?scheme:bool ->
+ ?user:bool ->
+ ?password:bool ->
+ ?host:bool ->
+ ?port:bool ->
+ ?path:bool ->
+ ?param:bool ->
+ ?query:bool ->
+ ?fragment:bool ->
+ ?other:bool ->
+ url ->
+ url
+ (* Removes the 'true' components from the URL, and returns the modified
+ * URL.
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val default_url :
+ ?encoded:bool ->
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url ->
+ url
+ (* Adds missing components and returns the modified URL.
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val undefault_url :
+ ?scheme:string ->
+ ?user:string ->
+ ?password:string ->
+ ?host:string ->
+ ?port:int ->
+ ?path:string list ->
+ ?param:string list ->
+ ?query:string ->
+ ?fragment:string ->
+ ?other:string ->
+ url ->
+ url
+ (* Removes components from the URL if they have the passed value, and
+ * returns the modified URL.
+ * Note: The values must always be passed in _encoded_ form!
+ * The modfied URL shares unmodified components with the original
+ * URL.
+ *)
+
+val url_syntax_of_url : url -> url_syntax
+ (* Returns the 'url_syntax' record of a URL. *)
+
+val url_of_string : url_syntax -> string -> url
+ (* Parses the passed string according to the passed url_syntax. *)
+
+val string_of_url : url -> string
+ (* Returns the URL as string *)
+
+val url_provides :
+ ?scheme:bool ->
+ ?user:bool ->
+ ?password:bool ->
+ ?host:bool ->
+ ?port:bool ->
+ ?path:bool ->
+ ?param:bool ->
+ ?query:bool ->
+ ?fragment:bool ->
+ ?other:bool ->
+ url ->
+ bool
+ (* Returns 'true' iff the URL has all of the components passed with
+ * 'true' value.
+ *)
+
+val url_scheme : url -> string
+val url_user : ?encoded:bool -> url -> string
+val url_password : ?encoded:bool -> url -> string
+val url_host : url -> string
+val url_port : url -> int
+val url_path : ?encoded:bool -> url -> string list
+val url_param : ?encoded:bool -> url -> string list
+val url_query : ?encoded:bool -> url -> string
+val url_fragment : ?encoded:bool -> url -> string
+val url_other : ?encoded:bool -> url -> string
+ (* Return components of the URL. The functions return decoded strings
+ * unless ~encoded:true is set.
+ * If the component does not exist, the exception Not_found
+ * is raised.
+ *)
+
+val split_path : string -> string list
+ (* Splits a '/'-separated path into components (e.g. to set up the
+ * ~path argument of make_url).
+ * E.g. split_path "a/b/c" = [ "a"; "b"; "c" ],
+ * split_path "/a/b" = [ ""; "a"; "b" ],
+ * split_path "a/b/" = [ "a"; "b"; "" ]
+ *)
+
+val join_path : string list -> string
+ (* Concatenates the path components (reverse function of split_path).
+ *)
+
+val norm_path : string list -> string list
+ (* Removes "." and ".." from the path if possible. Deletes double slashes.
+ *
+ * EXAMPLES:
+ *
+ * norm_path ["."] = []
+ * means: "." = ""
+ * norm_path ["."; ""] = []
+ * means: "./" = ""
+ * norm_path ["a"; "."] = ["a"; ""]
+ * means: "a/." = "a/"
+ * norm_path ["a"; "b"; "."] = ["a"; "b"; ""]
+ * means: "a/b/." = "a/b/"
+ * norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]
+ * means: "a/./b/." = "a/b/"
+ * norm_path [".."] = [".."; ""]
+ * means: ".." = "../"
+ * norm_path [".."; ""] = [".."; ""]
+ * means: "../" = "../"
+ * norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]
+ * means: "a/b/../c" = "a/c"
+ * norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]
+ * means: "a/b/../c/" = "a/c/"
+ * norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]
+ * means: "//a//b" = "/a/b"
+ * norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]
+ * means: "a/b//../c/" = "a/c/"
+ * norm_path ["a"; ".."] = []
+ * means: "a/.." = ""
+ *)
+
+
+val apply_relative_url : url -> url -> url
+ (* apply_relative_url base rel:
+ * Interprets 'rel' relative to 'base' and returns the new URL. This
+ * function implements RFC 1808.
+ *)
+
+val print_url : url -> unit
+ (* Printer for the toploop. *)
+
+(* ---------------------------------------------------------------------- *)
+
+(* EXAMPLES:
+ *
+ * let http = Hashtbl.find common_url_syntax "http";;
+ * let u = url_of_string http "http://g:pw@host/a/%62/";;
+ * string_of_url u;;
+ * --> "http://g:pw@host/a/%62/"
+ * url_scheme u;;
+ * --> "http"
+ * url_user u;;
+ * --> "g"
+ * url_password u;;
+ * --> "pw"
+ * url_host u;;
+ * --> "host"
+ * url_path u;;
+ * --> [ ""; "a"; "b"; "" ] (* sic! *)
+ * url_path ~encoded:true u;;
+ * --> [ ""; "a"; "%62"; "" ]
+ * let v = make_url
+ * ~path:[ ".."; "c" ]
+ * ~fragment:"near-the-#-character"
+ * { (partial_url_syntax http) with url_enable_fragment = Url_part_allowed };;
+ * string_of_url v;;
+ * --> "../c#near-the-%23-character"
+ * let u' = modify_url ~syntax:(url_syntax_of_url v) u;;
+ * (* u does not permit fragments *)
+ * let w = apply_relative_url u' v;;
+ * string_of_url w;;
+ * --> "http://g:pw@host/c#near-the-%23-character"
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:27 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/26 22:57:49 gerd
+ * Change: The record 'url_syntax' has an additional component
+ * 'url_accepts_8bits'. Setting this option to 'true' causes that
+ * the bytes >= 0x80 are no longer rejected.
+ *
+ * Revision 1.2 2000/06/25 22:55:47 gerd
+ * Doc update.
+ *
+ * Revision 1.1 2000/06/24 20:19:59 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
--- /dev/null
+# Note: you need an appopriate toploop "ocamlfattop" to run the
+# tests.
+
+# 2nd note: "test_encoding.cgi" is a CGI script; you must invoke
+# it through browser and WWW server.
+
+test: test_recode
+ ocamlfattop test_netencoding.ml
+ ocamlfattop test_mimestring.ml
+ ocamlfattop test_cgi.ml
+ ocamlfattop test_neturl.ml
+ ./test_recode
+
+test_recode: test_recode.ml
+ ocamlc -custom -o test_recode unix.cma threads.cma str.cma \
+ ../netstring.cma ../netmappings_iso.cmo \
+ -I .. -thread test_recode.ml
+
+distclean: clean
+ rm -f *~ test_recode
+
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+CLEAN:
+
--- /dev/null
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+
+open Cgi;;
+
+(**********************************************************************)
+(* dest_form_encoded_parameters *)
+(**********************************************************************)
+
+let t001 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t002 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t003 f =
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t004 f =
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text\013\n"]
+;;
+
+
+let t005 f =
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text\n"]
+;;
+
+
+let t006 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data;name= \"blupp\"
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t007 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data;name= \"name=blupp\"
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["name=blupp", "text/plain", "This is a text"]
+;;
+
+
+let t008 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; strange=\"name=blop\"; name= \"blupp\"
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t009 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; strange=\" name=blop \"; name=blupp
+
+This is a text
+--snip--
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text"]
+;;
+
+
+let t010 f =
+ (* There is a space after "octet-stream"! *)
+ let r =
+ f
+ "--snip
+Content-Disposition: form-data; name=blupp
+Content-type: application/octet-stream
+
+This is a text
+--snip--"
+ "snip"
+ in
+ r = ["blupp", "application/octet-stream", "This is a text"]
+;;
+
+
+let t011 f =
+ let r =
+ f
+ "blah blah
+--snip
+Content-Disposition: form-data; name=blupp
+
+This is a text
+--snip
+Content-Disposition: form-data; name=blipp
+
+Another line
+--snip-- blah
+blah blah"
+ "snip"
+ in
+ r = ["blupp", "text/plain", "This is a text";
+ "blipp", "text/plain", "Another line" ]
+;;
+
+
+let t012 f =
+ (* A real example *)
+ let r =
+ f
+"-----------------------------10843891265508332411092264958
+Content-Disposition: form-data; name=\"line\"
+
+aaa
+-----------------------------10843891265508332411092264958
+Content-Disposition: form-data; name=\"submit\"
+
+Submit
+-----------------------------10843891265508332411092264958--
+"
+ "---------------------------10843891265508332411092264958"
+ in
+ r = [ "line", "text/plain", "aaa";
+ "submit", "text/plain", "Submit";
+ ]
+;;
+
+
+(**********************************************************************)
+(* encode/decode *)
+(**********************************************************************)
+
+let t100() =
+ let s = String.create 256 in
+ for i = 0 to 255 do s.[i] <- Char.chr i done;
+ let r = encode s in
+ r = ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+ "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+ "+!%22%23$%25%26'()*%2B,-.%2F" ^
+ "0123456789%3A%3B%3C%3D%3E%3F" ^
+ "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+ "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+ "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+ "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+ "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+ "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+ "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+ "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+ "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+ "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF")
+;;
+
+
+let t101() =
+ let r = String.create 256 in
+ for i = 0 to 255 do r.[i] <- Char.chr i done;
+ let s = decode
+ ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+ "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+ "+!%22%23$%25%26'()*%2B,-.%2F" ^
+ "0123456789%3A%3B%3C%3D%3E%3F" ^
+ "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+ "%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+ "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+ "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+ "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+ "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+ "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+ "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+ "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+ "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF") in
+ r = s
+;;
+
+
+let t102() =
+ let r = String.create 256 in
+ for i = 0 to 255 do r.[i] <- Char.chr i done;
+ let s = decode
+ ((String.lowercase
+ ("%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" ^
+ "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F" ^
+ "+!%22%23$%25%26'()*%2B,-.%2F" ^
+ "0123456789%3A%3B%3C%3D%3E%3F")) ^
+ "%40ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_" ^
+ (String.lowercase
+ ("%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D%7E%7F" ^
+ "%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F" ^
+ "%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F" ^
+ "%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF" ^
+ "%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF" ^
+ "%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF" ^
+ "%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF" ^
+ "%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF" ^
+ "%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF"))) in
+ r = s
+;;
+
+(**********************************************************************)
+(* dest_url_encoded_parameters *)
+(**********************************************************************)
+
+let t200() =
+ let r = dest_url_encoded_parameters "a=b&c=d" in
+ r = ["a", "b"; "c", "d" ]
+;;
+
+
+let t201() =
+ let r = dest_url_encoded_parameters "a=&c=d" in
+ r = ["a", ""; "c", "d" ]
+;;
+
+
+let t202() =
+ let r = dest_url_encoded_parameters "a=&c=" in
+ r = ["a", ""; "c", "" ]
+;;
+
+
+let t203() =
+ let r = dest_url_encoded_parameters "" in
+ r = []
+;;
+
+
+let t204() =
+ let r = dest_url_encoded_parameters "%41=%42" in
+ r = ["A", "B"]
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+
+let test_dest_form_encoded_parameters f n =
+ let dest s b =
+ let args = dest_form_encoded_parameters s b default_config in
+ List.map
+ (fun a -> arg_name a, arg_mimetype a, arg_value a)
+ args
+ in
+ if f dest then
+ print_endline ("Test dest_form_encoded_parameters " ^ n ^ " ok")
+ else
+ print_endline ("Test dest_form_encoded_parameters " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+
+let fill_stream s =
+ (* Returns a channel that reads from string s.
+ * This requires forking.
+ *)
+ let rd, wr = Unix.pipe() in
+ let pid = Unix.fork() in
+ if pid = 0 then begin
+ Unix.close rd;
+ let out = Unix.out_channel_of_descr wr in
+ output_string out s;
+ close_out out;
+ exit(0);
+ end;
+ Unix.close wr;
+ Unix.in_channel_of_descr rd
+;;
+
+
+let test_dest_form_encoded_parameters_from_netstream f n =
+ let dest s b =
+ let fd = fill_stream s in
+ let bs = String.length b * 2 in
+ let stream = Netstream.create_from_channel fd None bs in
+ let args = dest_form_encoded_parameters_from_netstream
+ stream b default_config in
+
+(*
+ List.iter
+ (fun a ->
+ Printf.printf "name=%s mimetype=%s value=%s\n"
+ (arg_name a) (arg_mimetype a) (arg_value a))
+ args;
+*)
+ List.map
+ (fun a -> arg_name a, arg_mimetype a, arg_value a)
+ args
+ in
+ if f dest then
+ Printf.printf
+ "Test dest_form_encoded_parameters_from_netstream %s ok\n"
+ n
+ else
+ print_endline ("Test dest_form_encoded_parameters_from_netstream " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+
+
+test_dest_form_encoded_parameters t001 "001";;
+test_dest_form_encoded_parameters t002 "002";;
+test_dest_form_encoded_parameters t003 "003";;
+test_dest_form_encoded_parameters t004 "004";;
+test_dest_form_encoded_parameters t005 "005";;
+test_dest_form_encoded_parameters t006 "006";;
+test_dest_form_encoded_parameters t007 "007";;
+test_dest_form_encoded_parameters t008 "008";;
+test_dest_form_encoded_parameters t009 "009";;
+test_dest_form_encoded_parameters t010 "010";;
+test_dest_form_encoded_parameters t011 "011";;
+test_dest_form_encoded_parameters t012 "012";;
+
+test_dest_form_encoded_parameters_from_netstream t001 "001";;
+test_dest_form_encoded_parameters_from_netstream t002 "002";;
+test_dest_form_encoded_parameters_from_netstream t003 "003";;
+test_dest_form_encoded_parameters_from_netstream t004 "004";;
+test_dest_form_encoded_parameters_from_netstream t005 "005";;
+test_dest_form_encoded_parameters_from_netstream t006 "006";;
+test_dest_form_encoded_parameters_from_netstream t007 "007";;
+test_dest_form_encoded_parameters_from_netstream t008 "008";;
+test_dest_form_encoded_parameters_from_netstream t009 "009";;
+test_dest_form_encoded_parameters_from_netstream t010 "010";;
+test_dest_form_encoded_parameters_from_netstream t011 "011";;
+test_dest_form_encoded_parameters_from_netstream t012 "012";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+
+test t200 "200";;
+test t201 "201";;
+test t202 "202";;
+test t203 "203";;
+test t204 "204";;
--- /dev/null
+#! /bin/sh
+# (*
+exec /opt/ocaml-2.04/bin/ocamlfattop "$0" "$@"
+*) directory ".";;
+
+#directory "..";;
+#load "netstring.cma";;
+
+Cgi.header "";
+Cgi.parse_arguments
+ { Cgi.default_config with
+ Cgi.how_to_process_arguments = (fun _ -> Cgi.File)
+ };
+let params = Cgi.arguments() in
+print_string "<html><body>\n";
+print_string "<h1>Parameters:</h1>\n";
+print_string "<ul>\n";
+List.iter
+ (fun (n,a) ->
+ print_string "<li>";
+ print_string n;
+ print_string ":";
+ print_string (Cgi.arg_mimetype a);
+ print_string "=";
+ (match Cgi.arg_filename a with
+ None -> ()
+ | Some fn -> print_string ("[filename=" ^ fn ^ "]")
+ );
+ print_string (Cgi.arg_value a);
+ print_string "</li>\n";
+
+ )
+ params;
+
+Cgi.cleanup();
+
+print_string "</ul>\n";
+
+print_string "<h1>GET URL-encoded form</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=GET>\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+print_string "<h1>POST URL-encoded form</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=POST>\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+print_string "<h1>POST FORM-encoded form</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=POST enctype=\"multipart/form-data\">\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=text name=\"sträange\">\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+print_string "<h1>File upload</h1>\n";
+print_string "<form action=\"test_encoding.cgi\" method=POST enctype=\"multipart/form-data\">\n";
+print_string "<input type=text name=line>\n";
+print_string "<input type=file name=file>\n";
+print_string "<input type=submit name=submit value=\"Submit\">\n";
+print_string "</form>\n";
+
+
+
+print_string "</body></html>\n";
+
+flush stdout
+;;
+
+
--- /dev/null
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+open Mimestring;;
+
+(**********************************************************************)
+(* scan_structured_value *)
+(**********************************************************************)
+
+let t001() =
+ let r = scan_structured_value "user@domain.com" [ '@'; '.' ] [] in
+ r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t002() =
+ let r = scan_structured_value "user @ domain . com" [ '@'; '.' ] [] in
+ r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t003() =
+ let r = scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ]
+ [] in
+ r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t004() =
+ let r = scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] [] in
+ r = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain";
+ Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+
+let t005() =
+ let r = scan_structured_value "user(Do you know him?)@domain.com"
+ ['@'; '.'; '('] [] in
+ r = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know";
+ Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ]
+;;
+
+
+let t006() =
+ let r = scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] [] in
+ r = [ QString "My.name"; Special '@'; Atom "domain"; Special '.';
+ Atom "com" ]
+;;
+
+
+let t007() =
+ let r = scan_structured_value "\"\\\"()@. \"@domain.com" [ '@'; '.' ] [] in
+ r = [ QString "\"()@. "; Special '@'; Atom "domain"; Special '.';
+ Atom "com" ]
+;;
+
+
+let t008() =
+ let r = scan_structured_value "a(b(c(d)e)f)g" [] [] in
+ r = [ Atom "a"; Atom "g" ]
+;;
+
+
+let t009() =
+ let r = scan_structured_value "a(b(c(d)e)f" [] [] in
+ r = [ Atom "a" ]
+;;
+
+
+let t010() =
+ let r = scan_structured_value "a(b\\(c\\(d\\)e)f" [] [] in
+ r = [ Atom "a"; Atom "f" ]
+;;
+
+
+let t011() =
+ let r = scan_structured_value "a(b(c(d)e)f\\" [] [] in
+ r = [ Atom "a" ]
+;;
+
+
+let t012() =
+ let r = scan_structured_value "\"abc" [] [] in
+ r = [ QString "abc" ]
+;;
+
+
+let t013() =
+ let r = scan_structured_value "\"abc\\" [] [] in
+ r = [ QString "abc\\" ]
+;;
+
+
+(* New tests for netstring-0.9: *)
+
+let t020() =
+ let r = scan_structured_value "user(Do you know him?)@domain.com"
+ [ '@'; '.' ] [ Return_comments ] in
+ r = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.';
+ Atom "com" ]
+;;
+
+let t021() =
+ let r = scan_structured_value "user (Do you know him?) @ domain . com"
+ [ '@'; '.'; ' ' ] [] in
+ r = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@';
+ Special ' '; Atom "domain";
+ Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+let t022() =
+ let r = scan_structured_value "user (Do you know him?) @ domain . com"
+ [ '@'; '.'; ' ' ] [ Return_comments ] in
+ r = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@';
+ Special ' '; Atom "domain";
+ Special ' '; Special '.'; Special ' '; Atom "com" ]
+;;
+
+let t023() =
+ let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ [] [] in
+ r = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ]
+;;
+
+let t024() =
+ let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?="
+ [ ] [ Recognize_encoded_words ] in
+ r = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ]
+;;
+
+let t025() =
+ let r = scan_structured_value
+ "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
+ []
+ [ Recognize_encoded_words ] in
+ r = [ EncodedWord
+ ("ISO-8859-1", "B", "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=");
+ EncodedWord
+ ("ISO-8859-2", "B", "dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==")
+ ]
+;;
+
+(**********************************************************************)
+(* s_extended_token *)
+(**********************************************************************)
+
+let scan specials options str =
+ let scn = create_mime_scanner specials options str in
+ scan_token_list scn;;
+
+let t100() =
+ let r = scan [] [] "Two atoms" in
+ match r with
+ [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 3) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 4) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 4) &&
+ (get_length a2 = 5) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+
+let t101() =
+ let r = scan [] [] " Two atoms " in
+ match r with
+ [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 3) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 7) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 7) &&
+ (get_length a2 = 5) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+
+let t102() =
+ let r = scan [] [] " Two\n atoms " in
+ match r with
+ [ a1, Atom "Two"; a2, Atom "atoms" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 3) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 7) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 1) &&
+ (get_length a2 = 5) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t110() =
+ let r = scan [] [] "\"Two\" \"qstrings\"" in
+ match r with
+ [ a1, QString "Two"; a2, QString "qstrings" ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 5) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 6) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 6) &&
+ (get_length a2 = 10) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t111() =
+ let r = scan [] [] " \"Two\" \"qstrings\" " in
+ match r with
+ [ a1, QString "Two"; a2, QString "qstrings" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 5) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 9) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 9) &&
+ (get_length a2 = 10) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t112() =
+ let r = scan [] [] " \"Two\nlines\" \"and\nqstrings\" " in
+ match r with
+ [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 11) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 15) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 8) &&
+ (get_length a2 = 14) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t113() =
+ let r = scan [] [] " \"Two\\\nlines\" \"and\\\nqstrings\" " in
+ match r with
+ [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 12) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 16) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 8) &&
+ (get_length a2 = 15) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t120() =
+ (* Domain literals are implemented like quoted strings, so only the
+ * most complicated test case.
+ *)
+ let r = scan [] [] " [Two\\\nlines] [and\\\nliterals] " in
+ match r with
+ [ a1, DomainLiteral "Two\nlines"; a2, DomainLiteral "and\nliterals" ] ->
+
+ (get_pos a1 = 2) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 2) &&
+ (get_length a1 = 12) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 16) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 8) &&
+ (get_length a2 = 15) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t130() =
+ let r = scan [] [ Return_comments ] "(Two) (comments)" in
+ match r with
+ [ a1, Comment; a2, Comment ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 5) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 6) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 6) &&
+ (get_length a2 = 10) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t131() =
+ let r = scan [] [ Return_comments ] "(Two\nlines) (and\ncomments)" in
+ match r with
+ [ a1, Comment; a2, Comment ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 11) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 12) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 7) &&
+ (get_length a2 = 14) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t132() =
+ let r = scan [] [ Return_comments ] "(Two\\\nlines) (and\\\ncomments)" in
+ match r with
+ [ a1, Comment; a2, Comment ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 12) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 13) &&
+ (get_line a2 = 2) &&
+ (get_column a2 = 7) &&
+ (get_length a2 = 15) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t133() =
+ let r = scan [] [ Return_comments ] "(a\n(b\nc)d\ne(f)) atom" in
+ match r with
+ [ a1, Comment; a2, Atom "atom" ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 15) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 16) &&
+ (get_line a2 = 4) &&
+ (get_column a2 = 6) &&
+ (get_length a2 = 4) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t140() =
+ let r = scan [] [] "\031\031" in
+ match r with
+ [ a1, Control '\031'; a2, Control '\031' ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 1) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 1) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 1) &&
+ (get_length a2 = 1) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t150() =
+ let r = scan [ '\t'; '\n' ] [] " \t\n \n \t" in
+ match r with
+ [ a1, Special '\t'; _, Special '\n'; _, Special '\n'; a2, Special '\t'] ->
+
+ (get_pos a1 = 1) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 1) &&
+ (get_length a1 = 1) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+
+ (get_pos a2 = 8) &&
+ (get_line a2 = 3) &&
+ (get_column a2 = 2) &&
+ (get_length a2 = 1) &&
+ (separates_adjacent_encoded_words a2 = false)
+
+ | _ ->
+ false
+;;
+
+let t160() =
+ let r = scan [] [ Recognize_encoded_words ]
+ "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+ match r with
+ [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
+ a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 22) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+ (get_decoded_word a1 = "Görd") &&
+ (get_charset a1 = "ISO8859-1") &&
+
+ (get_pos a2 = 23) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 23) &&
+ (get_length a2 = 22) &&
+ (separates_adjacent_encoded_words a2 = false) &&
+ (get_decoded_word a2 = "Görd") &&
+ (get_charset a2 = "ISO8859-1")
+
+ | _ ->
+ false
+;;
+
+let t161() =
+ let r = scan [ ' ' ] [ Recognize_encoded_words ]
+ "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+ match r with
+ [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
+ sp, Special ' ';
+ a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 22) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+ (get_decoded_word a1 = "Görd") &&
+ (get_charset a1 = "ISO8859-1") &&
+
+ (get_pos a2 = 23) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 23) &&
+ (get_length a2 = 22) &&
+ (separates_adjacent_encoded_words a2 = false) &&
+ (get_decoded_word a2 = "Görd") &&
+ (get_charset a2 = "ISO8859-1") &&
+
+ (separates_adjacent_encoded_words sp = true)
+
+ | _ ->
+ false
+;;
+
+let t162() =
+ let r = scan [ ' ' ] [ Recognize_encoded_words ]
+ "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in
+ match r with
+ [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd");
+ sp1, Special ' ';
+ sp2, Special ' ';
+ a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] ->
+
+ (get_pos a1 = 0) &&
+ (get_line a1 = 1) &&
+ (get_column a1 = 0) &&
+ (get_length a1 = 22) &&
+ (separates_adjacent_encoded_words a1 = false) &&
+ (get_decoded_word a1 = "Görd") &&
+ (get_charset a1 = "ISO8859-1") &&
+
+ (get_pos a2 = 24) &&
+ (get_line a2 = 1) &&
+ (get_column a2 = 24) &&
+ (get_length a2 = 22) &&
+ (separates_adjacent_encoded_words a2 = false) &&
+ (get_decoded_word a2 = "Görd") &&
+ (get_charset a2 = "ISO8859-1") &&
+
+ (separates_adjacent_encoded_words sp1 = true) &&
+ (separates_adjacent_encoded_words sp2 = true)
+
+ | _ ->
+ false
+;;
+
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+test t001 "001";;
+test t002 "002";;
+test t003 "003";;
+test t004 "004";;
+test t005 "005";;
+test t006 "006";;
+test t007 "007";;
+test t008 "008";;
+test t009 "009";;
+test t010 "010";;
+test t011 "011";;
+test t012 "012";;
+test t013 "013";;
+
+test t020 "020";;
+test t021 "021";;
+test t022 "022";;
+test t023 "023";;
+test t024 "024";;
+test t025 "025";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+test t110 "110";;
+test t111 "111";;
+test t112 "112";;
+test t113 "113";;
+test t120 "120";;
+test t130 "130";;
+test t131 "131";;
+test t132 "132";;
+test t133 "133";;
+test t140 "140";;
+test t150 "150";;
+test t160 "160";;
+test t161 "161";;
+test t162 "162";;
--- /dev/null
+#require "str";;
+#directory "..";;
+#load "netstring.cma";;
+
+
+open Netencoding;;
+
+(**********************************************************************)
+(* Base64 *)
+(**********************************************************************)
+
+(* Test strings:
+ * "", "a", "ab", "abc", "abcd", "abcde",
+ * "abcdefghijklmnopqrstuvwxyz".
+ *)
+
+let t001() =
+ (* ENCODE. No line breaks. *)
+ Base64.encode "" = "" &
+ Base64.encode "a" = "YQ==" &
+ Base64.encode "ab" = "YWI=" &
+ Base64.encode "abc" = "YWJj" &
+ Base64.encode "abcd" = "YWJjZA==" &
+ Base64.encode "abcde" = "YWJjZGU=" &
+ Base64.encode "abcdefghijklmnopqrstuvwxyz" =
+ "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="
+;;
+
+
+let t002() =
+ (* ENCODE. Lines with length of 4, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 4 false = "" &
+ Base64.encode_substring abc 0 1 4 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 4 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 4 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 4 false = "YWJj\nZA==\n" &
+ Base64.encode_substring abc 0 5 4 false = "YWJj\nZGU=\n" &
+ Base64.encode_substring abc 0 26 4 false =
+ "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t003() =
+ (* ENCODE. Lines with length of 5, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 5 false = "" &
+ Base64.encode_substring abc 0 1 5 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 5 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 5 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 5 false = "YWJj\nZA==\n" &
+ Base64.encode_substring abc 0 5 5 false = "YWJj\nZGU=\n" &
+ Base64.encode_substring abc 0 26 5 false =
+ "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t004() =
+ (* ENCODE. Lines with length of 7, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 7 false = "" &
+ Base64.encode_substring abc 0 1 7 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 7 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 7 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 7 false = "YWJj\nZA==\n" &
+ Base64.encode_substring abc 0 5 7 false = "YWJj\nZGU=\n" &
+ Base64.encode_substring abc 0 26 7 false =
+ "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n"
+;;
+
+
+let t005() =
+ (* ENCODE. Lines with length of 8, separated by LF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 8 false = "" &
+ Base64.encode_substring abc 0 1 8 false = "YQ==\n" &
+ Base64.encode_substring abc 0 2 8 false = "YWI=\n" &
+ Base64.encode_substring abc 0 3 8 false = "YWJj\n" &
+ Base64.encode_substring abc 0 4 8 false = "YWJjZA==\n" &
+ Base64.encode_substring abc 0 5 8 false = "YWJjZGU=\n" &
+ Base64.encode_substring abc 0 26 8 false =
+ "YWJjZGVm\nZ2hpamts\nbW5vcHFy\nc3R1dnd4\neXo=\n"
+;;
+
+
+let t006() =
+ (* ENCODE. Lines with length of 8, separated by CRLF *)
+ let abc = "abcdefghijklmnopqrstuvwxyz" in
+ Base64.encode_substring abc 0 0 8 true = "" &
+ Base64.encode_substring abc 0 1 8 true = "YQ==\r\n" &
+ Base64.encode_substring abc 0 2 8 true = "YWI=\r\n" &
+ Base64.encode_substring abc 0 3 8 true = "YWJj\r\n" &
+ Base64.encode_substring abc 0 4 8 true = "YWJjZA==\r\n" &
+ Base64.encode_substring abc 0 5 8 true = "YWJjZGU=\r\n" &
+ Base64.encode_substring abc 0 26 8 true =
+ "YWJjZGVm\r\nZ2hpamts\r\nbW5vcHFy\r\nc3R1dnd4\r\neXo=\r\n"
+;;
+
+
+let t020() =
+ (* DECODE. First test without spaces *)
+ Base64.decode_substring "" 0 0 false false = "" &
+ Base64.decode_substring "YQ==" 0 4 false false = "a" &
+ Base64.decode_substring "YWI=" 0 4 false false = "ab" &
+ Base64.decode_substring "YWJj" 0 4 false false = "abc" &
+ Base64.decode_substring "YWJjZA==" 0 8 false false = "abcd" &
+ Base64.decode_substring "YWJjZGU=" 0 8 false false = "abcde" &
+ Base64.decode_substring
+ "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" 0 36 false false =
+ "abcdefghijklmnopqrstuvwxyz"
+;;
+
+
+let t021() =
+ (* DECODE. With spaces *)
+ Base64.decode_substring " \r\n\t" 0 4 false true = "" &
+ Base64.decode_substring " Y W J j\n Z G U = " 0 18 false true = "abcde"
+;;
+
+
+let t022() =
+ (* DECODE. With URL characters and spaces *)
+ Base64.decode_substring " Y W J j\n Z G U = " 0 18 true true = "abcde" &
+ Base64.decode_substring " Y W J j\n Z G U . " 0 18 true true = "abcde"
+;;
+
+(**********************************************************************)
+(* Quoted Printable *)
+(**********************************************************************)
+
+let t100() =
+ (* ENCODE. *)
+ QuotedPrintable.encode "a %= 12345 &$[]\"" = "a %=3D 12345 &=24=5B=5D=22" &
+ QuotedPrintable.encode "\000\001\002" = "=00=01=02" &
+ QuotedPrintable.encode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
+ QuotedPrintable.encode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
+ QuotedPrintable.encode "abc \r\n def\nghi " = "abc=20\r\n def\nghi=20"
+;;
+
+
+let t120() =
+ (* DECODE. *)
+ QuotedPrintable.decode "a %=3D 12345 &=24=5B=5D=22" = "a %= 12345 &$[]\"" &
+ QuotedPrintable.decode "=00=01=02" = "\000\001\002" &
+ QuotedPrintable.decode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" &
+ QuotedPrintable.decode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" &
+ QuotedPrintable.decode "abc=20\r\n def\nghi=20" = "abc \r\n def\nghi " &
+ QuotedPrintable.decode "abc=\r\n def\nghi=20" = "abc def\nghi "
+;;
+
+(**********************************************************************)
+(* Q *)
+(**********************************************************************)
+
+let t200() =
+ (* ENCODE. *)
+ Q.encode "a %= 12345 &$[]\"" = "a=20=25=3D=2012345=20=26=24=5B=5D=22" &
+ Q.encode "\000\001\002\r\n" = "=00=01=02=0D=0A"
+;;
+
+
+let t220() =
+ (* DECODE. *)
+ Q.decode "a=20=25=3D=2012345=20=26=24=5B=5D=22" = "a %= 12345 &$[]\"" &
+ Q.decode "=00=01=02=0D=0A" = "\000\001\002\r\n" &
+ Q.decode "a=20=25=3d=2012345=20=26=24=5b=5d=22" = "a %= 12345 &$[]\""
+;;
+
+(**********************************************************************)
+(* Url *)
+(**********************************************************************)
+
+(* Already tested for Cgi *)
+
+(**********************************************************************)
+(* Html *)
+(**********************************************************************)
+
+let t300() =
+ Html.encode_from_latin1 "<>&\"abcdefäöÜ\160\025'" =
+ "<>&"abcdefäöÜ '"
+;;
+
+
+let t320() =
+ Html.decode_to_latin1
+ "<>&"abcdefäöÜ " =
+ "<>&\"abcdefäöÜ\160\025" &
+ Html.decode_to_latin1 "'" = "'" &
+ Html.decode_to_latin1 "&nonsense;" = "&nonsense;" &
+ Html.decode_to_latin1 "Ā" = "Ā"
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+test t001 "001";
+test t002 "002";
+test t003 "003";
+test t004 "004";
+test t005 "005";
+test t006 "006";
+
+test t020 "020";
+test t021 "021";
+test t022 "022";
+
+test t100 "100";
+test t120 "120";
+
+test t200 "200";
+test t220 "220";
+
+test t300 "300";
+test t320 "320";
--- /dev/null
+#directory "..";;
+#load "netstring.cma";;
+
+open Neturl;;
+
+
+let expect_malformed_url f =
+ try ignore(f()); false with Malformed_URL -> true;;
+
+let works f =
+ not (expect_malformed_url f)
+;;
+
+(**********************************************************************)
+(* extract_url_scheme *)
+(**********************************************************************)
+
+let t001 () =
+ extract_url_scheme "a:bc" = "a" &&
+ extract_url_scheme "A:bc" = "a" &&
+ extract_url_scheme "a:b:c" = "a" &&
+ extract_url_scheme "a+b-c:d:e" = "a+b-c"
+;;
+
+
+let t002 () =
+ let test s =
+ try ignore(extract_url_scheme s); false with Malformed_URL -> true
+ in
+ test "a" &&
+ test "a/b:c" &&
+ test "%61:b" &&
+ test "a%3ab"
+;;
+
+(**********************************************************************)
+(* url_syntax *)
+(**********************************************************************)
+
+let hashtbl_for_all f h =
+ let b = ref true in
+ Hashtbl.iter
+ (fun k v -> b := !b && f k v)
+ h;
+ !b
+;;
+
+let t010 () =
+ url_syntax_is_valid null_url_syntax &&
+ url_syntax_is_valid ip_url_syntax &&
+ hashtbl_for_all
+ (fun _ syn ->
+ url_syntax_is_valid syn
+ )
+ common_url_syntax
+;;
+
+let t011 () =
+ url_syntax_is_valid (partial_url_syntax null_url_syntax) &&
+ url_syntax_is_valid (partial_url_syntax ip_url_syntax) &&
+ hashtbl_for_all
+ (fun _ syn ->
+ url_syntax_is_valid (partial_url_syntax syn)
+ )
+ common_url_syntax
+;;
+
+let t012 () =
+ let f = fun _ -> true in
+ let syn =
+ { url_enable_scheme = Url_part_not_recognized;
+ url_enable_user = Url_part_required;
+ url_enable_password = Url_part_allowed;
+ url_enable_host = Url_part_required;
+ url_enable_port = Url_part_not_recognized;
+ url_enable_path = Url_part_required;
+ url_enable_param = Url_part_not_recognized;
+ url_enable_query = Url_part_not_recognized;
+ url_enable_fragment = Url_part_required;
+ url_enable_other = Url_part_not_recognized;
+ url_accepts_8bits = false;
+ url_is_valid = f;
+ } in
+ let syn' = partial_url_syntax syn in
+
+ (syn'.url_enable_scheme = Url_part_not_recognized) &&
+ (syn'.url_enable_user = Url_part_allowed) &&
+ (syn'.url_enable_password = Url_part_allowed) &&
+ (syn'.url_enable_host = Url_part_allowed) &&
+ (syn'.url_enable_port = Url_part_not_recognized) &&
+ (syn'.url_enable_path = Url_part_allowed) &&
+ (syn'.url_enable_param = Url_part_not_recognized) &&
+ (syn'.url_enable_query = Url_part_not_recognized) &&
+ (syn'.url_enable_fragment = Url_part_allowed) &&
+ (syn'.url_enable_other = Url_part_not_recognized) &&
+ (syn'.url_is_valid == f) &&
+
+ url_syntax_is_valid syn &&
+ url_syntax_is_valid syn'
+;;
+
+(**********************************************************************)
+(* make_url *)
+(**********************************************************************)
+
+let t020 () =
+ (* Basic functionality: *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let u1 = make_url
+ (* default: not encoded *)
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%()~$@"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"?";""]
+ http_syn in
+
+ url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ u1 &&
+
+ not
+ (url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ ~query:true u1) &&
+
+ (url_syntax_of_url u1 == http_syn) &&
+
+ (url_scheme u1 = "http") &&
+ (url_user u1 = "U") &&
+ (url_password u1 = "%()~$@") &&
+ (url_host u1 = "a.b.c") &&
+ (url_port u1 = 81) &&
+ (url_path u1 = ["";"?";""]) &&
+
+ (url_user ~encoded:true u1 = "U") &&
+ (url_password ~encoded:true u1 = "%25()%7E$%40") &&
+ (url_path ~encoded:true u1 = ["";"%3F";""]) &&
+
+ string_of_url u1 = "http://U:%25()%7E$%40@a.b.c:81/%3F/"
+;;
+
+
+let t021 () =
+ (* Basic functionality: *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let u1 = make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"%55"
+ ~password:"%25()%7e$%40"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"%3F";""]
+ http_syn in
+
+ url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ u1 &&
+
+ not
+ (url_provides
+ ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true
+ ~query:true u1) &&
+
+ (url_syntax_of_url u1 == http_syn) &&
+
+ (url_scheme u1 = "http") &&
+ (url_user u1 = "U") &&
+ (url_password u1 = "%()~$@") &&
+ (url_host u1 = "a.b.c") &&
+ (url_port u1 = 81) &&
+ (url_path u1 = ["";"?";""]) &&
+
+ (url_user ~encoded:true u1 = "%55") &&
+ (url_password ~encoded:true u1 = "%25()%7e$%40") &&
+ (url_path ~encoded:true u1 = ["";"%3F";""]) &&
+
+ string_of_url u1 = "http://%55:%25()%7e$%40@a.b.c:81/%3F/"
+;;
+
+
+(* NEGATIVE TESTS *)
+
+let t030 () =
+ (* It is not possible to add a component which is not recognized *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%()~$@"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"?";""]
+ ~fragment:"abc"
+ http_syn)
+;;
+
+
+let t031 () =
+ (* It is not possible to put malformed '%'-encodings into the URL *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ works (* reference *)
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"XX"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%XX"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%X"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"%"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn)
+;;
+
+let t032 () =
+ (* It is not possible to put unsafe characters into the URL *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let make c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:(String.make 1 c)
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn
+ in
+
+ works (fun () -> make 'a') && (* reference *)
+
+ (* List of unsafe characters taken from RFC1738: *)
+ expect_malformed_url (fun () -> make '<') &&
+ expect_malformed_url (fun () -> make '>') &&
+ expect_malformed_url (fun () -> make '"') &&
+ expect_malformed_url (fun () -> make '#') &&
+ (* Note: '#' would be considered as reserved if fragments were enabled *)
+ expect_malformed_url (fun () -> make '%') &&
+ expect_malformed_url (fun () -> make '{') &&
+ expect_malformed_url (fun () -> make '}') &&
+ expect_malformed_url (fun () -> make '|') &&
+ expect_malformed_url (fun () -> make '\\') &&
+ expect_malformed_url (fun () -> make '^') &&
+ expect_malformed_url (fun () -> make '[') &&
+ expect_malformed_url (fun () -> make ']') &&
+ expect_malformed_url (fun () -> make '`') &&
+ expect_malformed_url (fun () -> make '~') &&
+ (* Note: '~' is considered as safe in paths: *)
+ works
+ (fun () ->
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"~";""]
+ http_syn)
+;;
+
+let t033 () =
+ (* It is not possible to put reserved characters into the URL *)
+ let http_syn = Hashtbl.find common_url_syntax "http" in
+
+ let make_password c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:(String.make 1 c)
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ http_syn
+ in
+ let make_path c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";String.make 1 c;""]
+ http_syn
+ in
+ let make_query c =
+ make_url
+ ~encoded:true
+ ~scheme:"http"
+ ~user:"U"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~port:81
+ ~path:["";"a";""]
+ ~query:(String.make 1 c)
+ http_syn
+ in
+
+ (* Note: There is a difference between RFC 1738 and RFC 1808 regarding
+ * which characters are reserved. RFC 1808 defines a fixed set of characters
+ * as reserved while RFC 1738 defines the reserved characters depending
+ * on the scheme.
+ * This implementation of URLs follows RFC 1738 (because of practical
+ * reasons).
+ *)
+
+ works (fun () -> make_password 'a') && (* reference *)
+ works (fun () -> make_path 'a') &&
+ works (fun () -> make_query 'a') &&
+
+ expect_malformed_url (fun () -> make_password ':') &&
+ expect_malformed_url (fun () -> make_password '@') &&
+ expect_malformed_url (fun () -> make_password '/') &&
+ works (fun () -> make_password ';') &&
+ works (fun () -> make_password '?') &&
+ works (fun () -> make_password '=') &&
+ works (fun () -> make_password '&') &&
+
+ (* Note: ';' is allowed in path and query because parameters are not
+ * recognized in HTTP syntax.
+ *)
+
+ expect_malformed_url (fun () -> make_path '/') &&
+ expect_malformed_url (fun () -> make_path '?') &&
+ works (fun () -> make_path ':') &&
+ works (fun () -> make_path '@') &&
+ works (fun () -> make_path ';') &&
+ works (fun () -> make_path '=') &&
+ works (fun () -> make_path '&') &&
+
+ expect_malformed_url (fun () -> make_query '?') &&
+ works (fun () -> make_query '/') &&
+ works (fun () -> make_query ':') &&
+ works (fun () -> make_query '@') &&
+ works (fun () -> make_query ';') &&
+ works (fun () -> make_query '=') &&
+ works (fun () -> make_query '&')
+;;
+
+
+let t034 () =
+ (* It is not possible to create a URL with a password, but without user;
+ * and neither to create a URL with a port, but without host;
+ * and neither to create a URL with a user, but without host
+ *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~password:"a"
+ ~host:"a.b.c"
+ ~path:["";"a";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~user:"U"
+ ~path:["";"a";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"http"
+ ~port:81
+ ~path:["";"a";""]
+ ip_url_syntax)
+;;
+
+
+let t035 () =
+ (* It is not possible to create a URL with illegal scheme prefix *)
+
+ (* reference: *)
+ works
+ (fun () ->
+ make_url
+ ~scheme:"a"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:":"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"a=b"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"a%62b"
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~scheme:"a&b"
+ ip_url_syntax)
+;;
+
+
+let t036 () =
+ (* It is not possible to have a path with double slashes *)
+
+ (* reference: *)
+ works
+ (fun () ->
+ make_url
+ ~path:["";"a";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["a";"";""]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["";"";"a"]
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~path:["a";"";"a"]
+ ip_url_syntax)
+;;
+
+
+let t037 () =
+ (* It is not possible to have port numbers outside 0..65535 *)
+
+ (* reference: *)
+ works
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~port:1
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~port:(-1)
+ ip_url_syntax) &&
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~port:65536
+ ip_url_syntax)
+;;
+
+
+let t038 () =
+ (* Several cases which are not allowed. *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~path:["a"]
+ ip_url_syntax
+ ) && (* illegal: host + relative path *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~path:[]
+ ~param:["x"]
+ ip_url_syntax
+ ) && (* illegal: host + no path + params *)
+
+ expect_malformed_url
+ (fun () ->
+ make_url
+ ~host:"a"
+ ~path:[]
+ ~query:"x"
+ ip_url_syntax
+ ) (* illegal: host + no path + query *)
+;;
+
+(**********************************************************************)
+(* url_of_string *)
+(**********************************************************************)
+
+let t050 () =
+ (* absolute URLs with ip_url_syntax *)
+ let identical s =
+ string_of_url (url_of_string ip_url_syntax s) = s in
+
+ let fails s =
+ try ignore(url_of_string ip_url_syntax s); false
+ with Malformed_URL -> true
+ in
+
+ identical "http:" &&
+
+ identical "http://host" &&
+ identical "http://user@host" &&
+ identical "http://user:password@host" &&
+ identical "http://user@host:99" &&
+ identical "http://user:password@host:99" &&
+
+ identical "http://host/" &&
+ identical "http://user@host/" &&
+ identical "http://user:password@host/" &&
+ identical "http://user@host:99/" &&
+ identical "http://user:password@host:99/" &&
+
+ identical "http://host/a/b" &&
+ identical "http://user@host/a/b" &&
+ identical "http://user:password@host/a/b" &&
+ identical "http://user@host:99/a/b" &&
+ identical "http://user:password@host:99/a/b" &&
+
+ identical "http://host/a/b/" &&
+ identical "http://user@host/a/b/" &&
+ identical "http://user:password@host/a/b/" &&
+ identical "http://user@host:99/a/b/" &&
+ identical "http://user:password@host:99/a/b/" &&
+
+ identical "http://host/?a=b&c=d" &&
+ identical "http://user@host/?a=b&c=d" &&
+ identical "http://user:password@host/?a=b&c=d" &&
+ identical "http://user@host:99/?a=b&c=d" &&
+ identical "http://user:password@host:99/?a=b&c=d" &&
+
+ fails "http://host?a=b&c=d" &&
+ fails "http://user@host?a=b&c=d" &&
+ fails "http://user:password@host?a=b&c=d" &&
+ fails "http://user@host:99?a=b&c=d" &&
+ fails "http://user:password@host:99?a=b&c=d" &&
+
+ identical "http://host/?a=/&c=/" &&
+ identical "http://user@host/?a=/&c=/" &&
+ identical "http://user:password@host/?a=/&c=/" &&
+ identical "http://user@host:99/?a=/&c=/" &&
+ identical "http://user:password@host:99/?a=/&c=/" &&
+
+ identical "http://host/;a;b" &&
+ identical "http://user@host/;a;b" &&
+ identical "http://user:password@host/;a;b" &&
+ identical "http://user@host:99/;a;b" &&
+ identical "http://user:password@host:99/;a;b" &&
+
+ fails "http://host;a;b" &&
+ fails "http://user@host;a;b" &&
+ fails "http://user:password@host;a;b" &&
+ fails "http://user@host:99;a;b" &&
+ fails "http://user:password@host:99;a;b" &&
+
+ identical "http://host/;a;b?a=b&c=d" &&
+ identical "http://user@host/;a;b?a=b&c=d" &&
+ identical "http://user:password@host/;a;b?a=b&c=d" &&
+ identical "http://user@host:99/;a;b?a=b&c=d" &&
+ identical "http://user:password@host:99/;a;b?a=b&c=d" &&
+
+ identical "http:#f" &&
+
+ identical "http://host#f" &&
+ identical "http://user@host#f" &&
+ identical "http://user:password@host#f" &&
+ identical "http://user@host:99#f" &&
+ identical "http://user:password@host:99#f" &&
+
+ identical "http://host/;a;b?a=b&c=d#f" &&
+ identical "http://user@host/;a;b?a=b&c=d#f" &&
+ identical "http://user:password@host/;a;b?a=b&c=d#f" &&
+ identical "http://user@host:99/;a;b?a=b&c=d#f" &&
+ identical "http://user:password@host:99/;a;b?a=b&c=d#f" &&
+
+ true
+;;
+
+
+let t051 () =
+ (* relative URLs with ip_url_syntax *)
+ let identical s =
+ string_of_url (url_of_string ip_url_syntax s) = s in
+
+ let fails s =
+ try ignore(url_of_string ip_url_syntax s); false
+ with Malformed_URL -> true
+ in
+
+ identical "//host" &&
+ identical "//user@host" &&
+ identical "//user:password@host" &&
+ identical "//user@host:99" &&
+ identical "//user:password@host:99" &&
+
+ identical "//host/" &&
+ identical "//user@host/" &&
+ identical "//user:password@host/" &&
+ identical "//user@host:99/" &&
+ identical "//user:password@host:99/" &&
+
+ identical "//host#f" &&
+ identical "//user@host#f" &&
+ identical "//user:password@host#f" &&
+ identical "//user@host:99#f" &&
+ identical "//user:password@host:99#f" &&
+
+ identical "/" &&
+ identical "/a" &&
+ identical "/a/" &&
+ identical "/a/a" &&
+
+ identical "/;a;b" &&
+ identical "/a;a;b" &&
+ identical "/a/;a;b" &&
+ identical "/a/a;a;b" &&
+
+ identical "/?a=b&c=d" &&
+ identical "/a?a=b&c=d" &&
+ identical "/a/?a=b&c=d" &&
+ identical "/a/a?a=b&c=d" &&
+
+ identical "/;a;b?a=b&c=d" &&
+ identical "/a;a;b?a=b&c=d" &&
+ identical "/a/;a;b?a=b&c=d" &&
+ identical "/a/a;a;b?a=b&c=d" &&
+
+ identical "/#f" &&
+ identical "/a#f" &&
+ identical "/a/#f" &&
+ identical "/a/a#f" &&
+
+ identical "/;a;b#f" &&
+ identical "/a;a;b#f" &&
+ identical "/a/;a;b#f" &&
+ identical "/a/a;a;b#f" &&
+
+ identical "/;a;b?a=b&c=d#f" &&
+ identical "/a;a;b?a=b&c=d#f" &&
+ identical "/a/;a;b?a=b&c=d#f" &&
+ identical "/a/a;a;b?a=b&c=d#f" &&
+
+ identical "" &&
+ identical "a" &&
+ identical "a/" &&
+ identical "a/a" &&
+
+ identical ";a;b" &&
+ identical "a;a;b" &&
+ identical "a/;a;b" &&
+ identical "a/a;a;b" &&
+
+ identical "?a=b&c=d" &&
+ identical "a?a=b&c=d" &&
+ identical "a/?a=b&c=d" &&
+ identical "a/a?a=b&c=d" &&
+
+ identical ";a;b?a=b&c=d" &&
+ identical "a;a;b?a=b&c=d" &&
+ identical "a/;a;b?a=b&c=d" &&
+ identical "a/a;a;b?a=b&c=d" &&
+
+ identical "#f" &&
+ identical "a#f" &&
+ identical "a/#f" &&
+ identical "a/a#f" &&
+
+ identical ";a;b#f" &&
+ identical "a;a;b#f" &&
+ identical "a/;a;b#f" &&
+ identical "a/a;a;b#f" &&
+
+ identical ";a;b?a=b&c=d#f" &&
+ identical "a;a;b?a=b&c=d#f" &&
+ identical "a/;a;b?a=b&c=d#f" &&
+ identical "a/a;a;b?a=b&c=d#f" &&
+
+ identical "." &&
+ identical "./" &&
+ identical "./a" &&
+
+ identical ".;a;b" &&
+ identical "./;a;b" &&
+ identical "./a;a;b" &&
+
+ identical ".?a=b&c=d" &&
+ identical "./?a=b&c=d" &&
+ identical "./a?a=b&c=d" &&
+
+ identical ".;a;b?a=b&c=d" &&
+ identical "./;a;b?a=b&c=d" &&
+ identical "./a;a;b?a=b&c=d" &&
+
+ identical ".#f" &&
+ identical "./#f" &&
+ identical "./a#f" &&
+
+ identical ".;a;b#f" &&
+ identical "./;a;b#f" &&
+ identical "./a;a;b#f" &&
+
+ identical ".;a;b?a=b&c=d#f" &&
+ identical "./;a;b?a=b&c=d#f" &&
+ identical "./a;a;b?a=b&c=d#f" &&
+
+ identical ".." &&
+ identical "../" &&
+ identical "../a" &&
+
+ identical "..;a;b" &&
+ identical "../;a;b" &&
+ identical "../a;a;b" &&
+
+ identical "..?a=b&c=d" &&
+ identical "../?a=b&c=d" &&
+ identical "../a?a=b&c=d" &&
+
+ identical "..;a;b?a=b&c=d" &&
+ identical "../;a;b?a=b&c=d" &&
+ identical "../a;a;b?a=b&c=d" &&
+
+ identical "..#f" &&
+ identical "../#f" &&
+ identical "../a#f" &&
+
+ identical "..;a;b#f" &&
+ identical "../;a;b#f" &&
+ identical "../a;a;b#f" &&
+
+ identical "..;a;b?a=b&c=d#f" &&
+ identical "../;a;b?a=b&c=d#f" &&
+ identical "../a;a;b?a=b&c=d#f" &&
+
+ string_of_url
+ (make_url ~path:["a:b"] ip_url_syntax) = "a%3Ab" &&
+
+ string_of_url
+ (make_url ~encoded:true ~path:["a:b"] ip_url_syntax) = "./a:b" &&
+
+ true
+;;
+
+
+let t052 () =
+ (* mailto: URLs *)
+ let mailto_syn = Hashtbl.find common_url_syntax "mailto" in
+
+ let identical s =
+ string_of_url (url_of_string mailto_syn s) = s in
+
+ let fails s =
+ try ignore(url_of_string mailto_syn s); false
+ with Malformed_URL -> true
+ in
+
+ identical "mailto:user@host" &&
+ identical "mailto:user@host;?;?" &&
+ fails "mailto:user@host#f"
+;;
+
+(**********************************************************************)
+(* split_path/join_path/norm_path: *)
+(**********************************************************************)
+
+let t060 () =
+ (split_path "" = []) &&
+ (split_path "/" = [ "" ]) &&
+ (split_path "/a" = [ ""; "a" ]) &&
+ (split_path "a" = [ "a" ]) &&
+ (split_path "a/" = [ "a"; "" ]) &&
+ (split_path "/a/" = [ ""; "a"; "" ]) &&
+ (split_path "/a/b" = [ ""; "a"; "b" ]) &&
+ (split_path "/a/b/" = [ ""; "a"; "b"; "" ]) &&
+ (split_path "/a/b/c" = [ ""; "a"; "b"; "c" ]) &&
+
+ (join_path [] = "") &&
+ (join_path [ "" ] = "/") &&
+ (join_path [ ""; "a" ] = "/a") &&
+ (join_path [ "a" ] = "a") &&
+ (join_path [ "a"; "" ] = "a/") &&
+ (join_path [ ""; "a"; "" ] = "/a/") &&
+ (join_path [ ""; "a"; "b" ] = "/a/b") &&
+ (join_path [ ""; "a"; "b"; "" ] = "/a/b/") &&
+ (join_path [ ""; "a"; "b"; "c" ] = "/a/b/c") &&
+
+ true
+;;
+
+
+let t061 () =
+ (norm_path ["."] = []) &&
+ (norm_path ["."; ""] = []) &&
+ (norm_path ["a"; "."] = ["a"; ""]) &&
+ (norm_path ["a"; "b"; "."] = ["a"; "b"; ""]) &&
+ (norm_path ["a"; "b"; ".."] = ["a"; ""]) &&
+ (norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]) &&
+ (norm_path [".."] = [".."; ""]) &&
+ (norm_path [".."; ""] = [".."; ""]) &&
+ (norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]) &&
+ (norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]) &&
+ (norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]) &&
+ (norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]) &&
+ (norm_path ["a"; ".."] = []) &&
+ (norm_path ["";""] = [""]) &&
+ (norm_path [""] = [""]) &&
+ (norm_path [] = []) &&
+
+ true
+;;
+
+(**********************************************************************)
+(* apply_relative_url: *)
+(**********************************************************************)
+
+let t070() =
+ (* Examples taken from RFC 1808 *)
+ let url = url_of_string ip_url_syntax in
+ let base = url "http://a/b/c/d;p?q#f" in
+ let aru = apply_relative_url base in
+
+ (aru (url "g:h") = url "g:h") &&
+ (aru (url "g") = url "http://a/b/c/g") &&
+ (aru (url "./g") = url "http://a/b/c/g") &&
+ (aru (url "g/") = url "http://a/b/c/g/") &&
+ (aru (url "/g") = url "http://a/g") &&
+ (aru (url "//g") = url "http://g") &&
+ (aru (url "?y") = url "http://a/b/c/d;p?y") &&
+ (aru (url "g?y") = url "http://a/b/c/g?y") &&
+ (aru (url "g?y/./x") = url "http://a/b/c/g?y/./x") &&
+ (aru (url "#s") = url "http://a/b/c/d;p?q#s") &&
+ (aru (url "g#s") = url "http://a/b/c/g#s") &&
+ (aru (url "g#s/./x") = url "http://a/b/c/g#s/./x") &&
+ (aru (url "g?y#s") = url "http://a/b/c/g?y#s") &&
+ (aru (url ";x") = url "http://a/b/c/d;x") &&
+ (aru (url "g;x") = url "http://a/b/c/g;x") &&
+ (aru (url "g;x?y#s") = url "http://a/b/c/g;x?y#s") &&
+ (aru (url ".") = url "http://a/b/c/") &&
+ (aru (url "./") = url "http://a/b/c/") &&
+ (aru (url "..") = url "http://a/b/") &&
+ (aru (url "../") = url "http://a/b/") &&
+ (aru (url "../g") = url "http://a/b/g") &&
+ (aru (url "../..") = url "http://a/") &&
+ (aru (url "../../") = url "http://a/") &&
+ (aru (url "../../g") = url "http://a/g") &&
+
+ (aru (url "") = url "http://a/b/c/d;p?q#f") &&
+ (aru (url "../../../g") = url "http://a/../g") &&
+ (aru (url "../../../../g") = url "http://a/../../g") &&
+ (aru (url "/./g") = url "http://a/./g") &&
+ (aru (url "/../g") = url "http://a/../g") &&
+ (aru (url "g.") = url "http://a/b/c/g.") &&
+ (aru (url ".g") = url "http://a/b/c/.g") &&
+ (aru (url "g..") = url "http://a/b/c/g..") &&
+ (aru (url "..g") = url "http://a/b/c/..g") &&
+ (aru (url "./../g") = url "http://a/b/g") &&
+ (aru (url "./g/.") = url "http://a/b/c/g/") &&
+ (aru (url "g/./h") = url "http://a/b/c/g/h") &&
+ (aru (url "g/../h") = url "http://a/b/c/h") &&
+ (aru (url "http:g") = url "http:g") &&
+ (aru (url "http:") = url "http:") &&
+
+ true
+;;
+
+
+(**********************************************************************)
+
+let test f n =
+ if f() then
+ print_endline ("Test " ^ n ^ " ok")
+ else
+ print_endline ("Test " ^ n ^ " FAILED!!!!");
+ flush stdout
+;;
+
+test t001 "001";
+test t002 "002";
+
+test t010 "010";
+test t011 "011";
+test t012 "012";
+
+test t020 "020";
+test t021 "021";
+
+test t030 "030";
+test t031 "031";
+test t032 "032";
+test t033 "033";
+test t034 "034";
+test t035 "035";
+test t036 "036";
+test t037 "037";
+test t038 "038";
+
+test t050 "050";
+test t051 "051";
+test t052 "052";
+
+test t060 "060";
+test t061 "061";
+
+test t070 "070";
+()
+;;
--- /dev/null
+
+
+let make_iso enc =
+ let s = ref "" in
+ for i = 0 to 255 do
+ let u = try Netconversion.makechar (enc :> Netconversion.encoding) i
+ with Not_found -> "" in
+ s := !s ^ u
+ done;
+ !s
+;;
+
+let make_ucs2 start stop =
+ let s = String.create ((stop - start) * 2) in
+ for i = 0 to stop-start-1 do
+ let k = 2 * i in
+ let c = i + start in
+ s.[k] <- Char.chr(c lsr 8);
+ s.[k+1] <- Char.chr(c land 0xff);
+ done;
+ s
+;;
+
+let make_ucs4 start stop =
+ let s = String.create ((stop - start) * 4) in
+ for i = 0 to stop-start-1 do
+ let k = 4 * i in
+ let c = i + start in
+ s.[k] <- Char.chr(c lsr 24);
+ s.[k+1] <- Char.chr((c lsr 16) land 0xff);
+ s.[k+2] <- Char.chr((c lsr 8) land 0xff);
+ s.[k+3] <- Char.chr(c land 0xff);
+ done;
+ s
+;;
+
+let name_of_encoding enc =
+ match enc with
+ `Enc_iso88591 -> "ISO_8859-1"
+ | `Enc_iso88592 -> "ISO_8859-2"
+ | `Enc_iso88593 -> "ISO_8859-3"
+ | `Enc_iso88594 -> "ISO_8859-4"
+ | `Enc_iso88595 -> "ISO_8859-5"
+ | `Enc_iso88596 -> "ISO_8859-6"
+ | `Enc_iso88597 -> "ISO_8859-7"
+ | `Enc_iso88598 -> "ISO_8859-8"
+ | `Enc_iso88599 -> "ISO_8859-9"
+ | `Enc_iso885910 -> "ISO_8859-10"
+ | `Enc_iso885913 -> "ISO_8859-13"
+ | `Enc_iso885914 -> "ISO_8859-14"
+ | `Enc_iso885915 -> "ISO_8859-15"
+ | `Enc_utf8 -> "UTF-8"
+ | `Enc_ucs4 -> "UCS-4"
+ | `Enc_ucs2 -> "UCS-2"
+ | `Enc_utf16 -> "UTF-16"
+
+ (* Note: GNU-iconv assumes big endian byte order *)
+;;
+
+let iconv_recode_string in_enc out_enc in_s =
+ let in_enc_name = name_of_encoding in_enc in
+ let out_enc_name = name_of_encoding out_enc in
+ let out_s = ref "" in
+
+ let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^
+ out_enc_name) in
+ (* Write in_s to in_ch in a new thread: *)
+ ignore
+ (Thread.create
+ (fun () ->
+ output_string in_ch in_s;
+ close_out in_ch;
+ )
+ ()
+ );
+ (* Read the result in the current thread: *)
+ let buf = String.create 1024 in
+ let n = ref 1 in
+ while !n <> 0 do
+ let n' = input out_ch buf 0 1024 in
+ out_s := !out_s ^ String.sub buf 0 n';
+ n := n'
+ done;
+ ignore(Unix.close_process (out_ch,in_ch));
+ !out_s
+;;
+
+let test_iso_and_utf8 enc =
+ let name = name_of_encoding enc in
+ print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout;
+ let s = make_iso enc in
+ let s1' = Netconversion.recode_string (enc :> Netconversion.encoding)
+ `Enc_utf8 s in
+ let s2' = iconv_recode_string enc `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8
+ (enc :> Netconversion.encoding) s1' in
+ let s2 = iconv_recode_string `Enc_utf8 enc s1' in
+ assert(s1 = s2 && s1 = s);
+ print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_0000_d7ff () =
+ print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... ";
+ flush stdout;
+ let s = make_ucs2 0 0xd800 in
+ let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+ let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+ let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
+ assert(s1 = s2 && s1 = s);
+ print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_e000_fffd () =
+ print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... ";
+ flush stdout;
+ let s = make_ucs2 0xe000 0xfffe in
+ let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+ let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+ let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
+ assert(s1 = s2 && s1 = s);
+ print_endline "OK"; flush stdout
+;;
+
+let test_utf16_and_utf8_10000_10FFFF () =
+ print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... ";
+ flush stdout;
+ for i = 1 to 16 do
+ let s0 = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in
+ let s = iconv_recode_string `Enc_ucs4 `Enc_utf16 s0 in
+ let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in
+ let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in
+ assert(s1' = s2');
+ let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in
+ let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in
+ assert(s1 = s2 && s1 = s);
+ print_string "+"; flush stdout;
+ done;
+ print_endline "OK"; flush stdout
+;;
+
+
+print_endline "Warning: You need the command 'iconv' to run this test!";
+flush stdout;
+test_iso_and_utf8 `Enc_iso88591;
+test_iso_and_utf8 `Enc_iso88592;
+test_iso_and_utf8 `Enc_iso88593;
+test_iso_and_utf8 `Enc_iso88594;
+test_iso_and_utf8 `Enc_iso88595;
+test_iso_and_utf8 `Enc_iso88596;
+test_iso_and_utf8 `Enc_iso88597;
+(* test_iso_and_utf8 `Enc_iso88598; *)
+test_iso_and_utf8 `Enc_iso88599;
+test_iso_and_utf8 `Enc_iso885910;
+(* test_iso_and_utf8 `Enc_iso885913; *)
+(* test_iso_and_utf8 `Enc_iso885914; *)
+(* test_iso_and_utf8 `Enc_iso885915; *)
+test_utf16_and_utf8_0000_d7ff();
+test_utf16_and_utf8_e000_fffd();
+(* This test does not work because iconv does not support the surrogate
+ * representation of UTF-16:
+ * test_utf16_and_utf8_10000_10FFFF();
+ *)
+()
+;;
--- /dev/null
+all:
+ $(MAKE) -C unimap_to_ocaml
+
+clean:
+
+CLEAN: clean
+ $(MAKE) -C unimap_to_ocaml CLEAN
+
+distclean: clean
+ $(MAKE) -C unimap_to_ocaml distclean
--- /dev/null
+*.cmo
+*.cmx
+*.cmi
+
+*.o
+*.a
+
--- /dev/null
+all: unimap_to_ocaml
+
+unimap_to_ocaml: unimap_to_ocaml.ml
+ ocamlfind ocamlc -g -package str -linkpkg -custom \
+ -o unimap_to_ocaml \
+ unimap_to_ocaml.ml
+
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+CLEAN: clean
+
+distclean: clean
+ rm -f *~ unimap_to_ocaml
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Printf;;
+
+let comment_re = Str.regexp "#.*$";;
+let space_re = Str.regexp "[ \t\r\n]+";;
+
+let read_unimap_format_a fname f =
+ (* Reads a Unicode mapping in format A from a "local" code to Unicode.
+ * Returns a list of pairs (localcode, unicode).
+ *)
+
+ let read_unimap_line() =
+ let s = input_line f in (* may raise End_of_file *)
+ let s' = Str.global_replace comment_re "" s in
+ let words = Str.split space_re s' in
+ match words with
+ [] -> raise Not_found
+ | [ localcode; unicode ] ->
+ int_of_string localcode, int_of_string unicode
+ | _ ->
+ failwith ("File " ^ fname ^ ": Do not know what to do with:\n" ^ s')
+ in
+
+ let rec read_following_lines() =
+ try
+ let localcode, unicode = read_unimap_line() in
+ (* may raise End_of_file, Not_found *)
+ (localcode, unicode) :: read_following_lines()
+ with
+ Not_found -> read_following_lines()
+ | End_of_file -> []
+ in
+
+ read_following_lines()
+;;
+
+
+type from_uni_list =
+ U_nil
+ | U_single of (int * int)
+ | U_list of (int * int) list
+
+type from_unicode =
+ from_uni_list array;;
+ (* A hashtable with fixed size (256). A pair (unicode, localcode) is
+ * stored at the position unicode mod 256 in the array.
+ *)
+
+
+let make_bijection unimap =
+ (* unimap: a list of pairs (localcode, unicode)
+ * returns a pair of arrays (m_to_unicode, m_from_unicode) with:
+ * - m_to_unicode.(localcode) = Some unicode,
+ * if the pair (localcode, unicode) exists
+ * m_to_unicode.(x) = None otherwise
+ * - m_from_unicode.(unicode lsr 8) = [ ...; (unicode,localcode); ... ]
+ *)
+
+ let m_to_unicode = Array.create 256 None in
+ let m_from_unicode = Array.create 256 [] in
+
+ List.iter
+ (fun (localcode, unicode) ->
+ assert(localcode < 256);
+
+ (* Update m_to_unicode: *)
+ if m_to_unicode.(localcode) <> None then
+ failwith ("Local code point " ^ string_of_int localcode ^
+ " mapped twice");
+ m_to_unicode.(localcode) <- Some unicode;
+
+ (* Update m_from_unicode: *)
+ let unilow = unicode land 255 in
+ if List.mem_assoc unicode (m_from_unicode.(unilow)) then
+ failwith ("Unicode code point " ^ string_of_int unicode ^
+ " mapped twice");
+ m_from_unicode.(unilow) <-
+ m_from_unicode.(unilow) @ [unicode,localcode];
+ )
+ unimap;
+
+ m_to_unicode, m_from_unicode
+;;
+
+
+let to_unimap_as_string to_unimap =
+ let make_repr x =
+ match x with
+ None -> -1
+ | Some u -> u
+ in
+ Marshal.to_string (Array.map make_repr to_unimap) [ Marshal.No_sharing ]
+;;
+
+
+let from_unimap_as_string from_unimap =
+ let make_repr l =
+ match l with
+ [] -> U_nil
+ | [u,l] -> U_single(u,l)
+ | _ -> U_list l
+ in
+ let m = Array.map make_repr from_unimap in
+ Marshal.to_string m [ Marshal.No_sharing ]
+;;
+
+
+let print_bijection f name m_to_unicode m_from_unicode =
+ (* Prints on file f this O'Caml code:
+ * let <name>_to_unicode = ...
+ * let <name>_from_unicode = ...
+ *)
+ fprintf f "let %s_to_unicode = lazy (Marshal.from_string \"%s\" 0 : int array);;\n"
+ name
+ (String.escaped (to_unimap_as_string m_to_unicode));
+
+ fprintf f "let %s_from_unicode = lazy (Marshal.from_string \"%s\" 0 : Netmappings.from_uni_list array);;\n "
+ name
+ (String.escaped (from_unimap_as_string m_from_unicode));
+;;
+
+
+let main() =
+ let files = ref [] in
+ let outch = ref (lazy stdout) in
+ Arg.parse
+ [ "-o", Arg.String (fun s -> outch := lazy (open_out s)),
+ " <file> Write result to this file"]
+ (fun s -> files := !files @ [s])
+ "usage: unimap_to_ocaml file.unimap ...";
+
+ (* First read in all unimaps: *)
+ let unimaps =
+ List.map
+ (fun filename ->
+ let mapname = Str.replace_first (Str.regexp "\.unimap$") ""
+ (Filename.basename filename) in
+ let f = open_in filename in
+ prerr_endline ("Reading " ^ filename);
+ let unimap = read_unimap_format_a filename f in
+ close_in f;
+ mapname, unimap
+ )
+ !files
+ in
+
+ (* Second compute all bijections: *)
+ let bijections =
+ List.map
+ (fun (mapname, unimap) ->
+ prerr_endline ("Processing " ^ mapname);
+ let to_unicode, from_unicode = make_bijection unimap in
+ mapname, to_unicode, from_unicode
+ )
+ unimaps
+ in
+
+ let out = Lazy.force !outch in
+ (* Third output all results: *)
+ output_string out "(* WARNING! This is a generated file! *)\n";
+
+ List.iter
+ (fun (mapname, to_unicode, from_unicode) ->
+ print_bijection out mapname to_unicode from_unicode)
+ bijections;
+ List.iter
+ (fun (mapname, _, _) ->
+ fprintf out "Hashtbl.add Netmappings.to_unicode `Enc_%s %s_to_unicode;\n"
+ mapname mapname;
+ fprintf out "Hashtbl.add Netmappings.from_unicode `Enc_%s %s_from_unicode;\n"
+ mapname mapname;
+ )
+ (List.rev bijections);
+ fprintf out "();;\n";
+
+ close_out out
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/29 00:48:52 gerd
+ * Conversion tables are now stored in marshalled form.
+ * New type for the conversion table Unicode to 8bit.
+ *
+ * Revision 1.2 2000/08/12 23:54:56 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+*.cmo
+*.cmx
+*.cmi
+
--- /dev/null
+Copyright 1999 by Gerd Stolpmann
+
+The package "markup" is copyright by Gerd Stolpmann.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "markup" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
--- /dev/null
+version = "1.0"
+requires = "netstring"
+description = "Validating parser for XML-1.0"
+archive(byte) = "pxp_types.cma
+ pxp_lex_iso88591.cma
+ pxp_lex_utf8.cma
+ pxp_engine.cma
+ pxp_utf8.cmo"
+archive(byte, pxp_without_utf8) = "pxp_types.cma
+ pxp_lex_iso88591.cma
+ pxp_engine.cma"
+archive(native) = "pxp_types.cmxa
+ pxp_lex_iso88591.cmxa
+ pxp_lex_utf8.cmxa
+ pxp_engine.cmxa
+ pxp_utf8.cmx"
+archive(native, pxp_without_utf8) = "pxp_types.cmxa
+ pxp_lex_iso88591.cmxa
+ pxp_engine.cmxa"
+
--- /dev/null
+# make all: make bytecode archive
+# make opt: make native archive
+# make install: install bytecode archive, and if present, native archive
+# make uninstall: uninstall package
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+# make release: cleanup, create archive, tag CVS module
+# (for developers)
+
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all:
+ $(MAKE) -C m2parsergen all
+ $(MAKE) -C tools/ucs2_to_utf8 all
+ $(MAKE) -f Makefile.code all
+ $(MAKE) -C compatibility all
+
+.PHONY: opt
+opt:
+ $(MAKE) -C m2parsergen all
+ $(MAKE) -C tools/ucs2_to_utf8 all
+ $(MAKE) -f Makefile.code opt
+ $(MAKE) -C compatibility opt
+
+.PHONY: install
+install: all tmp/pxp_entity.mli
+ files=`tools/collect_files *.cmi *.cma *.cmxa *.a \
+ pxp_utf8.cmo pxp_utf8.cmx pxp_utf8.o` && \
+ ocamlfind install $(NAME) $(MLI) tmp/pxp_entity.mli $$files META
+
+.PHONY: uninstall
+uninstall:
+ ocamlfind remove $(NAME)
+
+.PHONY: markup-install
+markup-install:
+ $(MAKE) -C compatibility install
+
+.PHONY: markup-uninstall
+markup-uninstall:
+ $(MAKE) -C compatibility uninstall
+
+tmp/pxp_entity.mli: pxp_entity.ml
+ mkdir -p tmp
+ rm -f tmp/pxp_entity.*
+ cp pxp_entity.ml tmp
+ echo '(* Sorry, this is currently undocumented *)' >tmp/mli
+ ocamlc -i -c tmp/pxp_entity.ml >>tmp/mli
+ mv tmp/mli tmp/pxp_entity.mli
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
+ rm -f pxp_yacc.ml
+ touch lexers/objects_iso88591 lexers/objects_utf8 lexers/depend
+ $(MAKE) -C lexers clean
+ $(MAKE) -C compatibility clean
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C doc CLEAN
+ $(MAKE) -C examples CLEAN
+ $(MAKE) -C rtests CLEAN
+ $(MAKE) -C m2parsergen CLEAN
+ touch tools/ucs2_to_utf8/depend
+ $(MAKE) -C tools/ucs2_to_utf8 clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+ $(MAKE) -C doc distclean
+ $(MAKE) -C examples distclean
+ $(MAKE) -C rtests distclean
+ $(MAKE) -C m2parsergen distclean
+ touch tools/ucs2_to_utf8/depend
+ $(MAKE) -C tools/ucs2_to_utf8 clean
+ $(MAKE) -C compatibility distclean
+
+RELEASE: META
+ awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE
+
+.PHONY: dist
+dist: RELEASE
+ r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*~" --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/oo_questions*" --exclude="*/testsamples*" --exclude="*/tmp/*" --exclude="*reptil*" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*.fig.bak" --exclude="*/ps/pic*" --exclude="*/examples/panel*" --exclude="*/examples/xmlforms_gtk*" --exclude="*/Mail*" $(NAME)/*
+
+.PHONY: tag-release
+tag-release: RELEASE
+ r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r markup
+
+.PHONY: release
+release: distclean
+ $(MAKE) tag-release
+ $(MAKE) dist
+
+.PHONY: dev
+dev:
+ $(MAKE) all
+ -$(MAKE) uninstall
+ $(MAKE) install
+ $(MAKE) -C examples/validate distclean
+ $(MAKE) -C examples/validate validate
--- /dev/null
+# make all: make bytecode archives
+# make opt: make native archives
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+all:
+ $(MAKE) -f Makefile.code pxp_types.cma
+ $(MAKE) -f Makefile.code pxp_lex_iso88591.cma
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cma; else rm -f pxp_lex_utf8.cma; fi
+ $(MAKE) -f Makefile.code pxp_engine.cma
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmo; else rm -f pxp_utf8.cmo; fi
+
+opt:
+ $(MAKE) -f Makefile.code pxp_types.cmxa
+ $(MAKE) -f Makefile.code pxp_lex_iso88591.cmxa
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cmxa; else rm -f pxp_lex_utf8.cmxa; fi
+ $(MAKE) -f Makefile.code pxp_engine.cmxa
+ if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmx; else rm -f pxp_utf8.cmx; fi
+
+#----------------------------------------------------------------------
+
+pxp_types.cma: $(OBJECTS_types)
+ $(OCAMLC) -a -o pxp_types.cma $(OBJECTS_types)
+
+pxp_types.cmxa: $(XOBJECTS_types)
+ $(OCAMLOPT) -a -o pxp_types.cmxa $(XOBJECTS_types)
+
+pxp_engine.cma: $(OBJECTS_engine)
+ $(OCAMLC) -a -o pxp_engine.cma $(OBJECTS_engine)
+
+pxp_engine.cmxa: $(XOBJECTS_engine)
+ $(OCAMLOPT) -a -o pxp_engine.cmxa $(XOBJECTS_engine)
+
+
+# The following rules are "phony" to force 'make' to go into the
+# "lexers" subdirectory.
+
+.PHONY: pxp_lex_iso88591.cma
+pxp_lex_iso88591.cma: $(CMI_types)
+ $(MAKE) -C lexers all_iso88591
+ cp lexers/pxp_lex_iso88591.cma .
+
+.PHONY: pxp_lex_iso88591.cmxa
+pxp_lex_iso88591.cmxa: $(CMI_types)
+ $(MAKE) -C lexers opt_iso88591
+ cp lexers/pxp_lex_iso88591.cmxa lexers/pxp_lex_iso88591.a .
+
+.PHONY: pxp_lex_utf8.cma
+pxp_lex_utf8.cma: $(CMI_types)
+ $(MAKE) -C lexers all_utf8
+ cp lexers/pxp_lex_utf8.cma .
+
+.PHONY: pxp_lex_utf8.cmxa
+pxp_lex_utf8.cmxa: $(CMI_types)
+ $(MAKE) -C lexers opt_utf8
+ cp lexers/pxp_lex_utf8.cmxa lexers/pxp_lex_utf8.a .
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = $(OCAMLFIND) ocamlc -package "$(PACKAGES)" \
+ -g -I lexers $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = $(OCAMLFIND) ocamlopt -package "$(PACKAGES)" \
+ -p -I lexers $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli pxp_yacc.ml
+ $(OCAMLDEP) *.ml *.mli >depend
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .m2y
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+.m2y.ml:
+ ./m2parsergen/m2parsergen < $< >`basename $< .m2y`.ml || { rm -f `basename $< .m2y`.ml; false; }
+
+*.mli:
+
+
+# Generated dependencies:
+
+include depend
+
--- /dev/null
+# User-configurable section:
+
+# yes or no: Do you want that the parser has support for the internal
+# representation as UTF-8 strings? "yes" is recommended, but the parser
+# becomes much bigger
+UTF8_SUPPORT = yes
+
+# --- End of User-configurable section.
+
+# Settings.
+
+NAME = pxp
+PACKAGES = netstring
+
+# Caml objects that are needed by the lexers:
+OBJECTS_types = \
+ pxp_types.cmo pxp_lexer_types.cmo
+
+CMI_types = $(OBJECTS_types:.cmo=.cmi)
+
+# Caml objects that depend on the lexers:
+OBJECTS_engine = \
+ pxp_lexers.cmo \
+ pxp_dfa.cmo \
+ pxp_aux.cmo pxp_reader.cmo \
+ pxp_entity.cmo pxp_dtd.cmo pxp_document.cmo \
+ pxp_yacc.cmo pxp_codewriter.cmo
+
+# Same as native objects:
+XOBJECTS_types = $(OBJECTS_types:.cmo=.cmx)
+XOBJECTS_engine = $(OBJECTS_engine:.cmo=.cmx)
+
+# .mli files to install:
+
+MLI = pxp_document.mli pxp_dtd.mli \
+ pxp_types.mli pxp_yacc.mli \
+ pxp_codewriter.mli pxp_dfa.mli
--- /dev/null
+*.cmo
+*.cmx
+*.cmi
+
--- /dev/null
+version = "PXP-emulator"
+requires = "pxp"
+description = "Validating parser for XML-1.0"
+archive(byte) = "markup.cma"
+archive(native) = "markup.cmxa"
+
--- /dev/null
+# make all: make bytecode archive
+# make opt: make native archive
+# make install: install bytecode archive, and if present, native archive
+# make uninstall: uninstall package
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all:
+ $(MAKE) -f Makefile.code all
+
+.PHONY: opt
+opt:
+ $(MAKE) -f Makefile.code opt
+
+.PHONY: install
+install: all
+ files=`../tools/collect_files *.cmi *.cma *.cmxa *.a` && \
+ ocamlfind install $(NAME) $(MLI) $$files META
+
+.PHONY: uninstall
+uninstall:
+ ocamlfind remove $(NAME)
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+
--- /dev/null
+# make all: make bytecode archives
+# make opt: make native archives
+#----------------------------------------------------------------------
+
+include Makefile.conf
+
+.PHONY: all
+all: markup.cma
+
+.PHONY: opt
+opt: markup.cmxa
+
+#----------------------------------------------------------------------
+
+markup.cma: $(OBJECTS)
+ $(OCAMLC) -a -o markup.cma $(OBJECTS)
+
+markup.cmxa: $(XOBJECTS)
+ $(OCAMLOPT) -a -o markup.cmxa $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+*.mli:
+
+
+# Generated dependencies:
+
+include depend
+
--- /dev/null
+NAME = markup
+
+OBJECTS = markup_types.cmo markup_dtd.cmo markup_reader.cmo \
+ markup_document.cmo markup_yacc.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+
+MLI = markup_document.mli markup_dtd.mli \
+ markup_types.mli markup_yacc.mli markup_reader.mli
+
--- /dev/null
+This directory contains the modules for Markup-0.2.10
+compatibility. The modules consist mainly of wrapper classes for the
+new PXP classes, and translate the old methods to the new ones.
+
+Please note that the compatibility is not perfect. Sometimes there are
+new methods which do not exist in Markup-0.2.10, and sometimes even
+existing methods changed their signature. I have tried to avoid that,
+but there are some ugly cases which are hard to solve without such
+modifications.
+
+Translating old methods into new methods costs time and
+memory. Because of this, it is best to consider the compatibility
+modules as migration path to PXP: You can test whether PXP parses your
+input files, and you can compare the old API with the new API
+directly. (However, it is hard to test new features of PXP with the
+compatibility modules; the old API does not reflect the new features.)
+
+The compatibility modules are currently maintained, but that will stop
+once PXP has been established.
+
+(Gerd)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+type node_type =
+ T_element of string
+ | T_data
+
+class type [ 'node ] extension = [ 'node ] Pxp_document.extension
+
+class type [ 'ext, 'node ] pxp_extension_type =
+object ('self)
+ method clone : 'self
+ method node : 'self Pxp_document.node
+ method set_node : 'self Pxp_document.node -> unit
+
+ method markup_node : 'node
+ method set_markup_node : 'node -> unit
+
+ method set_index : 'self Pxp_yacc.index -> unit
+ method index : 'self Pxp_yacc.index
+ end
+;;
+
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+ method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
+
+ method extension : 'ext
+ method delete : unit
+ method parent : 'ext node
+ method root : 'ext node
+ method orphaned_clone : 'ext node
+ method orphaned_flat_clone : 'ext node
+ method add_node : 'ext node -> unit
+ method add_pinstr : Markup_dtd.proc_instruction -> unit
+ method pinstr : string -> Markup_dtd.proc_instruction list
+ method pinstr_names : string list
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node -> unit) -> unit
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method set_nodes : 'ext node list -> unit
+ method data : string
+ method node_type : node_type
+ method attribute : string -> Markup_types.att_value
+ method attribute_names : string list
+ method attribute_type : string -> Markup_types.att_type
+ method attributes : (string * Markup_types.att_value) list
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ method quick_set_attributes : (string * Markup_types.att_value) list -> unit
+ method find : string -> 'ext node
+ method reset_finder : unit
+ method dtd : Markup_dtd.dtd
+ method create_element :
+ Markup_dtd.dtd -> node_type -> (string * string) list -> 'ext node
+ method create_data : Markup_dtd.dtd -> string -> 'ext node
+ method local_validate : unit
+ method keep_always_whitespace_mode : unit
+ method write_compact_as_latin1 : Markup_types.output_stream -> unit
+ method internal_adopt : 'ext node option -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : Markup_dtd.dtd -> string -> (string * string) list -> unit
+ end
+;;
+
+
+class [ 'ext ] pxp_extension init_markup_node =
+ (object (self : 'self)
+ (* constraint 'ext = 'ext node #extension *)
+ val mutable pxp_node = (None :
+ 'self Pxp_document.node option)
+ (* 'ext pxp_extension Pxp_document.node option *)
+ val mutable markup_node = (init_markup_node : 'ext node)
+
+ val mutable index = (None : 'self Pxp_yacc.index option)
+
+ method clone =
+ {< >}
+
+ method node =
+ match pxp_node with
+ None ->
+ assert false
+ | Some n -> n
+
+ method set_node n =
+ pxp_node <- Some n
+
+ method markup_node = markup_node
+
+ method set_markup_node n = markup_node <- n
+
+ method set_index ix =
+ index <- Some ix
+
+ method index =
+ match index with
+ None -> assert false
+ | Some x -> x
+
+ end
+ : ['ext, 'ext node] pxp_extension_type )
+;;
+
+
+class [ 'ext ] emulate_markup_node init_ext init_pxp_node =
+ object (self)
+ constraint 'ext = 'ext node #extension
+ val mutable pxp_node = (init_pxp_node :
+ ('ext, 'ext #node)
+ pxp_extension_type Pxp_document.node option)
+ val mutable extension = (init_ext : 'ext)
+
+ method pxp_node =
+ match pxp_node with
+ None -> assert false
+ | Some n -> n
+
+ method extension = extension
+ method delete = self # pxp_node # delete
+ method parent = self # pxp_node # parent # extension # markup_node
+ method root = self # pxp_node # root # extension # markup_node
+
+ method orphaned_clone =
+ let ext' = extension # clone in
+ let pxp' = self # pxp_node # orphaned_clone in
+ let n = new emulate_markup_node ext' (Some pxp') in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ pxp' # extension # set_markup_node n;
+ n
+
+ method orphaned_flat_clone =
+ let ext' = extension # clone in
+ let pxp' = self # pxp_node # orphaned_flat_clone in
+ let n = new emulate_markup_node ext' (Some pxp') in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ pxp' # extension # set_markup_node n;
+ n
+
+ method dtd = self # pxp_node # dtd
+
+ method add_node (n : 'ext node) =
+ let n_pxp = n # pxp_node in
+ self # pxp_node # add_node n_pxp
+
+ method add_pinstr pi =
+ self # pxp_node # add_pinstr pi
+
+ method sub_nodes =
+ let l = self # pxp_node # sub_nodes in
+ List.map (fun n_pxp -> n_pxp # extension # markup_node) l
+
+ method pinstr name =
+ self # pxp_node # pinstr name
+
+ method pinstr_names =
+ self # pxp_node # pinstr_names
+
+ method iter_nodes f =
+ self # pxp_node # iter_nodes
+ (fun n_pxp -> f (n_pxp # extension # markup_node))
+
+ method iter_nodes_sibl f =
+ self # pxp_node # iter_nodes_sibl
+ (fun left_pxp node_pxp right_pxp ->
+ let left =
+ match left_pxp with
+ None -> None
+ | Some n_pxp -> Some (n_pxp # extension # markup_node) in
+ let right =
+ match right_pxp with
+ None -> None
+ | Some n_pxp -> Some (n_pxp # extension # markup_node) in
+ let node =
+ node_pxp # extension # markup_node in
+ f left node right
+ )
+
+ method set_nodes (l : 'ext node list) =
+ let l_pxp = List.map (fun n -> n # pxp_node) l in
+ self # pxp_node # set_nodes l_pxp
+
+ method data = self # pxp_node # data
+
+ method node_type =
+ match self # pxp_node # node_type with
+ Pxp_document.T_data -> T_data
+ | Pxp_document.T_element name -> T_element name
+ | Pxp_document.T_super_root -> T_element "-vr"
+ | Pxp_document.T_pinstr _ -> T_element "-pi"
+ | _ -> assert false
+
+ method attribute name =
+ self # pxp_node # attribute name
+
+ method attribute_names =
+ self # pxp_node # attribute_names
+
+ method attribute_type name =
+ self # pxp_node # attribute_type name
+
+ method attributes =
+ self # pxp_node # attributes
+
+ method required_string_attribute name =
+ self # pxp_node # required_string_attribute name
+
+ method required_list_attribute name =
+ self # pxp_node # required_list_attribute name
+
+ method optional_string_attribute name =
+ self # pxp_node # optional_string_attribute name
+
+ method optional_list_attribute name =
+ self # pxp_node # optional_list_attribute name
+
+ method quick_set_attributes l =
+ self # pxp_node # quick_set_attributes l
+
+ method find (name : string) =
+ let index = self # root # pxp_node # extension # index in
+ let n = index # find name in (* may raise Not_found *)
+ n # extension # markup_node
+
+ method reset_finder = ()
+
+ method create_element dtd nt atts =
+ let nt_pxp =
+ match nt with
+ T_data -> Pxp_document.T_data
+ | T_element name -> Pxp_document.T_element name in
+ let node_pxp =
+ self # pxp_node # create_element dtd nt_pxp atts in
+ let ext' = extension # clone in
+ let n = new emulate_markup_node ext' (Some node_pxp) in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ node_pxp # extension # set_markup_node n;
+ n
+
+ method create_data dtd s =
+ let node_pxp =
+ self # pxp_node # create_data dtd s in
+ let ext' = extension # clone in
+ let n = new emulate_markup_node ext' (Some node_pxp) in
+ ext' # set_node (n : 'ext #node :> 'ext node);
+ node_pxp # extension # set_markup_node n;
+ n
+
+ method keep_always_whitespace_mode =
+ self # pxp_node # keep_always_whitespace_mode
+
+ method write_compact_as_latin1 out =
+ self # pxp_node # write_compact_as_latin1 out
+
+ method local_validate =
+ self # pxp_node # local_validate()
+
+ method internal_adopt (p:'ext node option) =
+ assert false;
+ ()
+
+ method internal_delete (n:'ext node) =
+ assert false;
+ ()
+
+ method internal_init (d:Markup_dtd.dtd) (s:string) (atts:(string*string)list) =
+ assert false;
+ ()
+ end
+;;
+
+class [ 'ext ] data_impl ext data =
+ object (self)
+ inherit [ 'ext ] emulate_markup_node ext None
+ constraint 'ext = 'ext node #extension
+ initializer
+ if data <> "" then
+ failwith "Emulation of Markup_document: Cannot instantiate data node with non-empty string";
+ let self' = (self : 'ext #node :> 'ext node ) in
+ pxp_node <- Some (new Pxp_document.data_impl (new pxp_extension self'))
+
+ end
+;;
+
+class [ 'ext ] element_impl ext =
+ object (self)
+ inherit [ 'ext ] emulate_markup_node ext None
+ initializer
+ let self' = (self : 'ext #node :> 'ext node ) in
+ pxp_node <- Some (new Pxp_document.element_impl (new pxp_extension self'))
+ end
+;;
+
+
+class [ 'ext ] document w =
+ object (self)
+ val pxp_doc = new Pxp_document.document
+ (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings)
+
+ val mutable standalone_flag = false
+
+ method init_xml_version v =
+ pxp_doc # init_xml_version v
+
+ method xml_version =
+ pxp_doc # xml_version
+
+ method init_xml_standalone b =
+ standalone_flag <- b
+
+ method xml_standalone = standalone_flag
+
+ method init_root (r : 'ext node) =
+ pxp_doc # init_root (r # pxp_node);
+ self # dtd # set_standalone_declaration standalone_flag
+ (* questionable *)
+
+ method root =
+ let pxp_root = pxp_doc # root in
+ pxp_root # extension # markup_node
+
+ method dtd =
+ pxp_doc # dtd
+
+ method add_pinstr pi =
+ pxp_doc # add_pinstr pi
+
+ method pinstr name =
+ pxp_doc # pinstr name
+
+ method pinstr_names =
+ pxp_doc # pinstr_names
+
+ method write_compact_as_latin1 out =
+ pxp_doc # write_compact_as_latin1 out
+
+ end
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/08/18 20:19:00 gerd
+ * Changed the emulation: there are now wrapper objects for nodes.
+ * This was necessary because node_type changed in PXP such that it became
+ * incompatible with Markup's node_type.
+ *
+ * Revision 1.5 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.4 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.3 2000/06/14 22:19:27 gerd
+ * Update because of additional 'encoding' methods.
+ *
+ * Revision 1.2 2000/05/30 00:08:40 gerd
+ * Bugfix.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_document.mli.
+ * It corresponds to revision 1.13 of markup_document.mli.
+ *)
+
+(**********************************************************************)
+(* *)
+(* Markup_document: *)
+(* Object model of the document/element instances *)
+(* *)
+(**********************************************************************)
+
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class type node ............. The common class type of the nodes of
+ * the element tree. Nodes are either
+ * elements (inner nodes) or data nodes
+ * (leaves)
+ * class type extension ........ The minimal properties of the so-called
+ * extensions of the nodes: Nodes can be
+ * customized by applying a class parameter
+ * that adds methods/values to nodes.
+ * class data_impl : node ...... Implements data nodes.
+ * class element_impl : node ... Implements element nodes
+ * class document .............. A document is an element with some additional
+ * properties
+ *
+ * ======================================================================
+ *
+ * THE STRUCTURE OF NODE TREES:
+ *
+ * Every node except the root node has a parent node. The parent node is
+ * always an element, because data nodes never contain other nodes.
+ * In the other direction, element nodes may have children; both elements
+ * and data nodes are possible as children.
+ * Every node knows its parent (if any) and all its children (if any);
+ * the linkage is maintained in both directions. A node without a parent
+ * is called a root.
+ * It is not possible that a node is the child of two nodes (two different nodes
+ * or a multiple child of the same node).
+ * You can break the connection between a node and its parent; the method
+ * "delete" performs this operations and deletes the node from the parent's
+ * list of children. The node is now a root, for itself and for all
+ * subordinate nodes. In this context, the node is also called an orphan,
+ * because it has lost its parent (this is a bit misleading because the
+ * parent is not always the creator of a node).
+ * In order to simplify complex operations, you can also set the list of
+ * children of an element. Nodes that have been children before are unchanged;
+ * new nodes are added (and the linkage is set up), nodes no more occurring
+ * in the list are handled if they have been deleted.
+ * If you try to add a node that is not a root (either by an "add" or by a
+ * "set" operation) the operation fails.
+ *
+ * CREATION OF NODES
+ *
+ * The class interface supports creation of nodes by cloning a so-called
+ * exemplar. The idea is that it is sometimes useful to implement different
+ * element types by different classes, and to implement this by looking up
+ * exemplars.
+ * Imagine you have three element types A, B, and C, and three classes
+ * a, b, and c implementing the node interface (for example, by providing
+ * different extensions, see below). The XML parser can be configured to
+ * have a lookup table
+ * { A --> a0, B --> b0, C --> c0 }
+ * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
+ * objects belonging to these classes. If the parser finds an instance of
+ * A, it looks up the exemplar a0 of A and clones it (actually, the method
+ * "create_element" performs this for elements, and "create_data" for data
+ * nodes). Clones belong to the same class as the original nodes, so the
+ * instances of the elements have the same classes as the configured
+ * exemplars.
+ * Note: This technique assumes that the interface of all exemplars is the
+ * same!
+ *
+ * THE EXTENSION
+ *
+ * The class type node and all its implementations have a class parameter
+ * 'ext which must at least fulfil the properties of the class type "extension".
+ * The idea is that you can add properties, for example:
+ *
+ * class my_extension =
+ * object
+ * (* minimal properties required by class type "extension": *)
+ * method clone = ...
+ * method node = ...
+ * method set_node n = ...
+ * (* here my own methods: *)
+ * method do_this_and_that ...
+ * end
+ *
+ * class my_element_impl = [ my_extension ] element_impl
+ * class my_data_impl = [ my_extension ] data_impl
+ *
+ * The whole XML parser is parameterized with 'ext, so your extension is
+ * visible everywhere (this is the reason why extensibility is solved by
+ * parametric polymorphism and not by inclusive polymorphism (subtyping)).
+ *
+ *
+ * SOME COMPLICATED TYPE EXPRESSIONS
+ *
+ * Sometimes the following type expressions turn out to be necessary:
+ *
+ * 'a node extension as 'a
+ * This is the type of an extension that belongs to a node that
+ * has an extension that is the same as we started with.
+ *
+ * 'a extension node as 'a
+ * This is the type of a node that has an extension that belongs to a
+ * node of the type we started with.
+ *
+ *
+ * DOCUMENTS
+ * ...
+ *
+ * ======================================================================
+ *
+ * SIMPLE USAGE: ...
+ *)
+
+
+open Markup_dtd
+
+
+type node_type =
+ T_element of string
+ | T_data
+
+
+
+class type [ 'node ] extension =
+ object ('self)
+ method clone : 'self
+ (* "clone" should return an exact deep copy of the object. *)
+ method node : 'node
+ (* "node" returns the corresponding node of this extension. This method
+ * intended to return exactly what previously has been set by "set_node".
+ *)
+ method set_node : 'node -> unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end
+;;
+
+class type [ 'ext, 'node ] pxp_extension_type =
+object ('self)
+ method clone : 'self
+ method node : 'self Pxp_document.node
+ method set_node : 'self Pxp_document.node -> unit
+
+ method markup_node : 'node
+ method set_markup_node : 'node -> unit
+
+ method set_index : 'self Pxp_yacc.index -> unit
+ method index : 'self Pxp_yacc.index
+ end
+;;
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+ method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node
+
+ method extension : 'ext
+ (* Return the extension of this node: *)
+
+ method delete : unit
+ (* Delete this node from the parent's list of sub nodes. This node gets
+ * orphaned.
+ * 'delete' does nothing if this node does not have a parent.
+ *)
+
+ method parent : 'ext node
+ (* Get the parent, or raise Not_found if this node is an orphan. *)
+
+ method root : 'ext node
+ (* Get the direct or indirect parent that does not have a parent itself,
+ * i.e. the root of the tree.
+ *)
+
+ method orphaned_clone : 'ext node
+ (* return an exact clone of this element and all sub nodes (deep copy)
+ * except string values which are shared by this node and the clone.
+ * The other exception is that the clone has no parent (i.e. it is now
+ * a root).
+ *)
+
+ method orphaned_flat_clone : 'ext node
+ (* return a clone of this element where all subnodes are omitted.
+ * The type of the node, and the attributes are the same as in the
+ * original node.
+ * The clone has no parent.
+ *)
+
+ method add_node : 'ext node -> unit
+ (* Append new sub nodes -- mainly used by the parser itself, but
+ * of course open for everybody. If an element is added, it must be
+ * an orphan (i.e. does not have a parent node); and after addition
+ * *this* node is the new parent.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* Add a processing instruction to the set of processing instructions of
+ * this node. Usually only elements contain processing instructions.
+ *)
+
+ method pinstr : string -> proc_instruction list
+ (* Get all processing instructions with the passed name *)
+
+ method pinstr_names : string list
+ (* Get a list of all names of processing instructions *)
+
+ method sub_nodes : 'ext node list
+ (* Get the list of sub nodes *)
+
+ method iter_nodes : ('ext node -> unit) -> unit
+ (* iterate over the sub nodes *)
+
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ (* Here every iteration step can also access to the previous and to the
+ * following node if present:
+ *)
+
+ method find : string -> 'ext node
+ (* Get the node that has an ID attribute with this value, or raise
+ * Not_found.
+ * "find" may also cause a Validation_error if something is wrong
+ * with the IDs.
+ *)
+
+ method reset_finder : unit
+ (* makes that newly added nodes will also be found *)
+
+ method set_nodes : 'ext node list -> unit
+ (* Set the list of sub nodes. Elements that are no longer sub nodes gets
+ * orphaned, and all new elements that previously were not sub nodes
+ * must have been orphaned.
+ *)
+
+ method data : string
+ (* Get the data string of this node. For data nodes, this string is just
+ * the content. For elements, this string is the concatenation of all
+ * subordinate data nodes.
+ *)
+
+ method node_type : node_type
+ (* Get the name of the element type. *)
+
+ method attribute : string -> Markup_types.att_value
+ method attribute_names : string list
+ method attribute_type : string -> Markup_types.att_type
+ method attributes : (string * Markup_types.att_value) list
+ (* Get a specific attribute; get the names of all attributes; get the
+ * type of a specific attribute; get names and values of all attributes.
+ * Only elements have attributes.
+ * Note: If the DTD allows arbitrary for this element, "attribute_type"
+ * raises Undeclared.
+ *)
+
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ (* Return the attribute or fail if the attribute is not present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ (* Return some attribute value or return None if the attribute is not
+ * present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method quick_set_attributes : (string * Markup_types.att_value) list -> unit
+ (* Sets the attributes but does not check whether they match the DTD.
+ *)
+
+ method dtd : dtd
+ (* Get the DTD *)
+
+ method create_element : dtd -> node_type -> (string * string) list -> 'ext node
+ (* create an "empty copy" of this element:
+ * - new DTD
+ * - new node type
+ * - new attribute list
+ * - empty list of nodes
+ *)
+
+ method create_data : dtd -> string -> 'ext node
+ (* create an "empty copy" of this data node: *)
+
+ method local_validate : unit
+ (* Check that this element conforms to the DTD: *)
+
+ method keep_always_whitespace_mode : unit
+ (* Normally, add_node does not accept data nodes when the DTD does not
+ * allow data nodes or only whitespace ("ignorable whitespace").
+ * Once you have invoked this method, ignorable whitespace is forced
+ * to be included into the document.
+ *)
+
+ method write_compact_as_latin1 : Markup_types.output_stream -> unit
+ (* Write the contents of this node and the subtrees to the passed
+ * output stream; the character set ISO-8859-1 is used. The format
+ * is compact (the opposite of "pretty printing").
+ *)
+
+ (* ---------------------------------------- *)
+ (* internal methods: *)
+ method internal_adopt : 'ext node option -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : dtd -> string -> (string * string) list -> unit
+ end
+;;
+
+class [ 'ext ] data_impl : 'ext -> string -> [ 'ext ] node
+
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+
+class [ 'ext ] document :
+ Markup_types.collect_warnings ->
+ object
+ method init_xml_version : string -> unit
+ method init_xml_standalone : bool -> unit
+ method init_root : 'ext node -> unit
+
+ method xml_version : string
+ method xml_standalone : bool
+ method dtd : dtd
+ method root : 'ext node
+
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+
+ method write_compact_as_latin1 : Markup_types.output_stream -> unit
+ (* Write the document to the passed
+ * output stream; the character set ISO-8859-1 is used. The format
+ * is compact (the opposite of "pretty printing").
+ * If a DTD is present, the DTD is included into the internal subset.
+ *)
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:16 gerd
+ * Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.3 2000/07/16 16:35:06 gerd
+ * Update because PXP interface contains now the method 'write'.
+ *
+ * Revision 1.2 2000/06/14 22:19:27 gerd
+ * Update because of additional 'encoding' methods.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.13 2000/05/27 19:15:08 gerd
+ * Removed the method init_xml_standalone.
+ *
+ * Revision 1.12 2000/05/01 20:42:34 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.11 2000/04/30 18:15:57 gerd
+ * Beautifications.
+ * New method keep_always_whitespace_mode.
+ *
+ * Revision 1.10 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.9 2000/01/27 21:51:56 gerd
+ * Added method 'attributes'.
+ *
+ * Revision 1.8 2000/01/27 21:19:07 gerd
+ * Added further methods.
+ *
+ * Revision 1.7 1999/11/09 22:20:14 gerd
+ * Removed method init_dtd from class "document". The DTD is
+ * implicitly passed to the document by the root element.
+ *
+ * Revision 1.6 1999/09/01 22:51:40 gerd
+ * Added methods to store processing instructions.
+ *
+ * Revision 1.5 1999/09/01 16:19:57 gerd
+ * The "document" class has now a "warner" as class argument.
+ *
+ * Revision 1.4 1999/08/19 21:59:13 gerd
+ * Added method "reset_finder".
+ *
+ * Revision 1.3 1999/08/19 01:08:29 gerd
+ * Added method "find".
+ *
+ * Revision 1.2 1999/08/15 02:19:41 gerd
+ * Some new explanations: That unknown elements are not rejected
+ * if the DTD allows them.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+class dtd w =
+ Pxp_dtd.dtd
+ (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings)
+ `Enc_iso88591;;
+
+class dtd_element dtd name =
+ Pxp_dtd.dtd_element dtd name;;
+
+class dtd_notation name id =
+ Pxp_dtd.dtd_notation name id `Enc_iso88591;;
+
+class proc_instruction target value =
+ Pxp_dtd.proc_instruction target value `Enc_iso88591;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/06/14 22:19:27 gerd
+ * Update because of additional 'encoding' methods.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_dtd.mli.
+ * It corresponds to revision 1.11 of markup_dtd.mli.
+ *)
+
+(**********************************************************************)
+(* *)
+(* Markup_dtd: *)
+(* Object model of document type declarations *)
+(* *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ * declarations, entity declarations, notation
+ * declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ * of a content model and an attribute list
+ * declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+ Markup_types.collect_warnings ->
+ Pxp_dtd.dtd
+ (* Incompatibilities:
+ * add_gen_entity, gen_entity
+ *)
+
+class dtd_element : dtd -> string -> Pxp_dtd.dtd_element
+ (* Incompatibilities:
+ * set_content_model, add_attribute
+ *)
+
+class dtd_notation : string -> Markup_types.ext_id -> Pxp_dtd.dtd_notation
+
+class proc_instruction : string -> string -> Pxp_dtd.proc_instruction
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.11 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.10 2000/05/27 19:20:38 gerd
+ * Changed the interfaces for the standalone check: New
+ * methods: standalone_declaration, set_standalone_declaration,
+ * externally_declared, attribute_violates_standalone_declaration.
+ * The method set_content_model has been renamed to
+ * set_cm_and_extdecl; it now initializes also whether the element
+ * has been declared in an external entity.
+ * Methods add_gen_entity and gen_entity pass an additional
+ * boolean argument containing whether the declaration of the
+ * general entity happened in an external entity.
+ * Method add_attribute expects this argument, too, which
+ * states whether the declaration of the attribute happened in an
+ * external entity.
+ *
+ * Revision 1.9 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.8 2000/05/06 23:10:26 gerd
+ * allow_arbitrary for elements, too.
+ *
+ * Revision 1.7 2000/05/01 20:42:52 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.6 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.5 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.4 1999/11/09 22:15:41 gerd
+ * Added method "arbitrary_allowed".
+ *
+ * Revision 1.3 1999/09/01 16:21:56 gerd
+ * "dtd" classes have now an argument that passes a "warner".
+ *
+ * Revision 1.2 1999/08/15 02:20:23 gerd
+ * New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+open Markup_types;;
+
+class type resolver =
+ object
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ end
+;;
+
+(* General note: close_in is simulated by close_all. Of course, this is
+ * wrong, but it should not matter
+ *)
+
+
+class resolve_read_channel ch the_warner =
+ object (self)
+ val pxp_resolver =
+ new Pxp_reader.resolve_read_this_channel
+ ~auto_close:false
+ ch
+ val warner = the_warner
+
+ initializer
+ pxp_resolver # init_warner
+ (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
+ pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+ method open_in xid =
+ pxp_resolver # open_in xid
+
+ method close_in =
+ pxp_resolver # close_all (* sic! *)
+
+ method change_encoding enc =
+ pxp_resolver # change_encoding enc
+
+ method clone =
+ ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+
+ end
+;;
+
+
+class resolve_read_string str =
+ object (self)
+ val pxp_resolver =
+ new Pxp_reader.resolve_read_this_string str
+ val warner = new Pxp_types.drop_warnings
+
+ initializer
+ pxp_resolver # init_warner warner;
+ pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+ method open_in xid =
+ pxp_resolver # open_in xid
+
+ method close_in =
+ pxp_resolver # close_all (* sic! *)
+
+ method change_encoding enc =
+ pxp_resolver # change_encoding enc
+
+ method clone =
+ ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+ end
+;;
+
+
+class resolve_as_file the_warner =
+ object (self)
+ val pxp_resolver =
+ new Pxp_reader.resolve_as_file
+ ~system_encoding:`Enc_iso88591
+ ()
+ val warner = the_warner
+
+ initializer
+ pxp_resolver # init_warner
+ (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings);
+ pxp_resolver # init_rep_encoding `Enc_iso88591;
+
+ method open_in xid =
+ pxp_resolver # open_in xid
+
+ method close_in =
+ pxp_resolver # close_all (* sic! *)
+
+ method change_encoding enc =
+ pxp_resolver # change_encoding enc
+
+ method clone =
+ ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver )
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_reader.mli.
+ * It corresponds to revision 1.3 of markup_reader.mli.
+ *)
+
+open Markup_types;;
+
+
+(* The class type resolver is the official type of all "resolvers".
+ * Resolvers get file names (or better, external identifiers) and
+ * return lexbufs, scanning the file for tokens. Resolvers may be
+ * cloned, and clones can interpret relative file names relative to
+ * their creator.
+ *)
+
+class type resolver =
+ object
+ (* A resolver can open a character source, and returns this source as
+ * Lexing.lexbuf.
+ * The resolver should recode the source into ISO-8859-1. By default,
+ * a resolver should assume UTF-8 or UTF-16 encoding. Before
+ * 'change_encoding' is invoked, the resolver should only return
+ * lexbufs with one character. After 'change_encoding' has been invoked,
+ * there is no character limit anymore.
+ * 'change_encoding' can only be invoked once. This method is usually
+ * called after the <? ... ?> prolog of the entity has been read.
+ * If this method is not called, it is up to the resolver to find out
+ * if UTF-8 or UTF-16 is used. It is recommended to invoke this method
+ * with an empty string to indicate this situation.
+ *)
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+
+
+ (* Every resolver can be cloned. The clone does not inherit the connection
+ * with the external object, i.e. it is closed.
+ *)
+ method clone : resolver
+
+ end
+;;
+
+
+(* The following class is the current main implementation of resolvers.
+ * It fetches strings from an arbitrary source (by calling init_in, and
+ * then repeatedly next_string), recodes them to ISO-8859-1, and creates
+ * lexbufs for them.
+ * It is not complete, as the source is missing.
+ *
+ * Note that 'resolve_general' may change in future revisions; it is ugly.
+ *)
+
+(* -- This API simulation does not provide 'resolve_general' any longer
+
+class virtual resolve_general :
+ collect_warnings ->
+ object
+ val mutable encoding : string
+ val mutable encoding_requested : bool
+ val warner : collect_warnings
+
+ method clone : resolver
+
+ method private warn : int -> unit
+ method private autodetect : string -> unit
+
+ method private virtual next_string : string -> int -> int -> int
+ method private virtual init_in : ext_id -> unit
+ method virtual close_in : unit
+
+ method open_in : ext_id -> Lexing.lexbuf
+
+ method change_encoding : string -> unit
+ end
+*)
+
+
+(* The next classes are resolvers for concrete input sources. *)
+
+class resolve_read_channel :
+ in_channel -> collect_warnings -> resolver;;
+
+ (* Reads from the passed channel (it may be even a pipe). Note that this
+ * resolver cannot handle file inclusions, as it is pre-bound to a
+ * specific channel and is not able to interpret file names.
+ * That means, if there is a entity reference (something like &name; or
+ * %name;) to parse, and the definition points to another file, the
+ * resolver will fail.
+ *)
+
+
+class resolve_read_string :
+ string -> resolver;;
+
+ (* Reads from the passed string. As 'resolver_read_channel', this
+ * resolver cannot handle file inclusions.
+ *)
+
+
+class resolve_as_file :
+ collect_warnings -> resolver;;
+
+ (* Reads from the local file system. Every file name is interpreted as
+ * file name of the local file system, and the referred file is read.
+ * This resolver can handle file inclusions as long as they do not
+ * exceed the scope of the local file system (i.e. no URLs).
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/03/13 23:41:54 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+
+type ext_id = Pxp_types.ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+type dtd_id = Pxp_types.dtd_id=
+ External of ext_id
+ | Derived of ext_id
+ | Internal
+type content_model_type = Pxp_types.content_model_type =
+ Unspecified
+ | Empty
+ | Any
+ | Mixed of mixed_spec list
+ | Regexp of regexp_spec
+and mixed_spec = Pxp_types.mixed_spec =
+ MPCDATA
+ | MChild of string
+and regexp_spec = Pxp_types.regexp_spec =
+ Optional of regexp_spec
+ | Repeated of regexp_spec
+ | Repeated1 of regexp_spec
+ | Alt of regexp_spec list
+ | Seq of regexp_spec list
+ | Child of string
+type att_type = Pxp_types.att_type =
+ A_cdata
+ | A_id
+ | A_idref
+ | A_idrefs
+ | A_entity
+ | A_entities
+ | A_nmtoken
+ | A_nmtokens
+ | A_notation of string list
+ | A_enum of string list
+type att_default = Pxp_types.att_default =
+ D_required
+ | D_implied
+ | D_default of string
+ | D_fixed of string
+type att_value = Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+
+class collect_warnings =
+object
+ val mutable w = Buffer.create 100
+ method print_warnings =
+ Buffer.contents w
+ method reset =
+ Buffer.clear w
+ method warn s =
+ Buffer.add_string w ("WARNING: " ^ s ^ "\n")
+end
+
+exception Illegal_character of int
+exception Validation_error = Pxp_types.Validation_error
+exception WF_error = Pxp_types.WF_error
+exception Character_not_supported = Pxp_types.Character_not_supported
+exception Bad_character_stream = Netconversion.Malformed_code
+exception At = Pxp_types.At
+exception Undeclared = Pxp_types.Undeclared
+
+let string_of_exn = Pxp_types.string_of_exn
+
+type output_stream = Pxp_types.output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+
+let write = Pxp_types.write
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/08/18 20:19:16 gerd
+ * Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.4 2000/07/16 18:30:15 gerd
+ * Updated because PXP does no longer have the exception
+ * Illegal_character.
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_types.mli.
+ * It corresponds to revision 1.7 of markup_types.mli.
+ *)
+
+
+type ext_id = Pxp_types.ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+type dtd_id = Pxp_types.dtd_id =
+ External of ext_id
+ | Derived of ext_id
+ | Internal
+type content_model_type = Pxp_types.content_model_type =
+ Unspecified
+ | Empty
+ | Any
+ | Mixed of mixed_spec list
+ | Regexp of regexp_spec
+and mixed_spec = Pxp_types.mixed_spec =
+ MPCDATA
+ | MChild of string
+and regexp_spec = Pxp_types.regexp_spec =
+ Optional of regexp_spec
+ | Repeated of regexp_spec
+ | Repeated1 of regexp_spec
+ | Alt of regexp_spec list
+ | Seq of regexp_spec list
+ | Child of string
+type att_type = Pxp_types.att_type =
+ A_cdata
+ | A_id
+ | A_idref
+ | A_idrefs
+ | A_entity
+ | A_entities
+ | A_nmtoken
+ | A_nmtokens
+ | A_notation of string list
+ | A_enum of string list
+type att_default = Pxp_types.att_default =
+ D_required
+ | D_implied
+ | D_default of string
+ | D_fixed of string
+type att_value = Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+
+class collect_warnings :
+ object
+ method warn : string -> unit
+ method print_warnings : string
+ method reset : unit
+ end
+;;
+
+
+exception Illegal_character of int
+exception Validation_error of string
+exception WF_error of string
+exception Character_not_supported
+exception Bad_character_stream
+exception At of (string * exn)
+exception Undeclared
+
+val string_of_exn : exn -> string
+ (* Converts a Markup exception into a readable string *)
+
+
+type output_stream = Pxp_types.output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+
+val write : output_stream -> string -> int -> int -> unit
+ (* write os s pos len: Writes the string to the buffer/channel/stream *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.7 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5 2000/05/01 20:43:25 gerd
+ * New type output_stream; new function 'write'.
+ *
+ * Revision 1.4 1999/09/01 16:25:35 gerd
+ * Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3 1999/08/15 02:22:40 gerd
+ * Added exception Undeclared.
+ *
+ * Revision 1.2 1999/08/14 22:15:17 gerd
+ * New class "collect_warnings".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *)
+
+open Markup_types
+open Markup_dtd
+open Markup_document
+
+type config =
+ { warner : collect_warnings;
+ errors_with_line_numbers : bool;
+ processing_instructions_inline : bool;
+ virtual_root : bool;
+ debugging_mode : bool;
+ }
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
+ | Channel of in_channel
+ | File of string
+ | Latin1 of string
+ | ExtID of (ext_id * Markup_reader.resolver)
+
+type 'ext domspec =
+ { map : (node_type, 'ext node) Hashtbl.t;
+ default_element : 'ext node;
+ }
+
+
+class default_ext =
+ object(self)
+ val mutable node = (None : ('a extension node as 'a) option)
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+ end
+;;
+
+
+let default_extension = new default_ext;;
+
+let default_config =
+ { warner = new collect_warnings;
+ errors_with_line_numbers = true;
+ processing_instructions_inline = false;
+ virtual_root = false;
+ debugging_mode = false;
+ }
+
+
+let default_dom =
+ let d = Hashtbl.create 2 in
+ Hashtbl.add d T_data (new data_impl default_extension "");
+ { map = d;
+ default_element = new element_impl default_extension
+ }
+;;
+
+
+let pxp_config cfg =
+ { Pxp_yacc.default_config with
+ Pxp_yacc.warner = (cfg.warner :> Pxp_types.collect_warnings);
+ Pxp_yacc.errors_with_line_numbers = cfg.errors_with_line_numbers;
+ Pxp_yacc.enable_pinstr_nodes = cfg.processing_instructions_inline;
+ Pxp_yacc.enable_super_root_node = cfg.virtual_root;
+ Pxp_yacc.encoding = `Enc_iso88591;
+ Pxp_yacc.recognize_standalone_declaration = false;
+ Pxp_yacc.debugging_mode = cfg.debugging_mode;
+ }
+;;
+
+
+class pxp_resolver r =
+ object (self)
+ val markup_resolver = r
+
+ method init_rep_encoding enc =
+ assert (enc = `Enc_iso88591 )
+
+ method init_warner w =
+ ()
+
+ method rep_encoding = `Enc_iso88591
+
+ method open_in xid =
+ markup_resolver # open_in xid
+
+ method close_in =
+ markup_resolver # close_in
+
+ method close_all =
+ markup_resolver # close_in
+
+ method change_encoding enc =
+ markup_resolver # change_encoding enc
+
+ method clone =
+ ( {< markup_resolver = markup_resolver # clone >}
+ : #Pxp_reader.resolver :> Pxp_reader.resolver )
+ end
+;;
+
+
+let pxp_source src =
+ match src with
+ Entity (mkent, res) -> Pxp_yacc.Entity(mkent, new pxp_resolver res)
+ | ExtID (id, res) -> Pxp_yacc.ExtID(id, new pxp_resolver res)
+ | Channel ch -> Pxp_yacc.from_channel
+ ~system_encoding:`Enc_iso88591 ch
+ | File f -> Pxp_yacc.from_file
+ ~system_encoding:`Enc_iso88591 f
+ | Latin1 s -> Pxp_yacc.from_string ~fixenc:`Enc_iso88591 s
+;;
+
+
+let pxp_dom dom =
+ let dex =
+ try Hashtbl.find dom.map T_data
+ with Not_found -> assert false
+ in
+ let eex = dom.default_element in
+ let m = Hashtbl.create 100 in
+ Hashtbl.iter
+ (fun nt ex ->
+ match nt with
+ T_element name when name <> "-vr" && name <> "-pi" ->
+ let pxp_ex = ex # pxp_node in
+ Hashtbl.add m name pxp_ex
+ | _ -> ()
+ )
+ dom.map;
+ let srex =
+ try
+ Some ((Hashtbl.find dom.map (T_element "-vr")) # pxp_node)
+ with
+ Not_found -> None
+ in
+ let piex =
+ try
+ Some ((Hashtbl.find dom.map (T_element "-pi")) # pxp_node)
+ with
+ Not_found -> None
+ in
+ Pxp_document.make_spec_from_mapping
+ ?super_root_exemplar:srex
+ ?default_pinstr_exemplar:piex
+ ~data_exemplar:(dex # pxp_node)
+ ~default_element_exemplar:(eex # pxp_node)
+ ~element_mapping:m
+ ()
+;;
+
+
+let markup_document w index doc =
+ let mdoc = new document w in
+ mdoc # init_xml_version (doc # xml_version);
+ mdoc # init_xml_standalone (doc # xml_standalone);
+ let r = doc # root # extension in
+ r # set_index index;
+ mdoc # init_root (r # markup_node);
+ List.iter
+ (fun piname ->
+ let l = doc # pinstr piname in
+ List.iter
+ (fun pi -> mdoc # add_pinstr pi)
+ l)
+ (doc # pinstr_names);
+ mdoc
+;;
+
+
+
+let parse_dtd_entity cfg src =
+ Pxp_yacc.parse_dtd_entity
+ (pxp_config cfg)
+ (pxp_source src)
+;;
+
+
+let parse_document_entity cfg src dom =
+ let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+ markup_document
+ cfg.warner
+ index
+ (Pxp_yacc.parse_document_entity
+ ~id_index:index
+ (pxp_config cfg)
+ (pxp_source src)
+ (pxp_dom dom))
+;;
+
+
+let parse_content_entity cfg src dtd dom =
+ let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+ let n =
+ (Pxp_yacc.parse_content_entity
+ ~id_index:index
+ (pxp_config cfg)
+ (pxp_source src)
+ dtd
+ (pxp_dom dom)) # extension in
+ n # set_index index;
+ n # markup_node
+;;
+
+
+let parse_wf_entity cfg src dom =
+ let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in
+ (* Restriction: index is not filled! *)
+ markup_document
+ cfg.warner
+ index
+ (Pxp_yacc.parse_wfdocument_entity
+ (pxp_config cfg)
+ (pxp_source src)
+ (pxp_dom dom))
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:16 gerd
+ * Updates in the emulation because of PXP changes.
+ *
+ * Revision 1.3 2000/07/14 21:35:35 gerd
+ * Updated because of the simplification of Pxp_types.collect_warnings.
+ *
+ * Revision 1.2 2000/07/08 17:40:50 gerd
+ * Updated the simulation.
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * Markup! The validating XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *
+ * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_yacc.mli.
+ * It corresponds to revision 1.4 of markup_yacc.mli.
+ *)
+
+
+(*$ markup-yacc.mli *)
+
+open Markup_types
+open Markup_dtd
+open Markup_document
+
+type config =
+ { warner : collect_warnings;
+ (* An object that collects warnings. *)
+
+ errors_with_line_numbers : bool;
+ (* Whether error messages contain line numbers or not. The parser
+ * is 10 to 20 per cent faster if line numbers are turned off;
+ * you get only character positions in this case.
+ *)
+
+ processing_instructions_inline : bool;
+ (* true: turns a special mode for processing instructions on. Normally,
+ * you cannot determine the exact location of a PI; you only know
+ * in which element the PI occurs. The "inline" mode makes it possible
+ * to find the exact location out: Every PI is artificially wrapped
+ * by a special element with name "-pi". For example, if the XML text
+ * is <a><?x?><?y?></a>, the parser normally produces only an element
+ * object for "a", and puts the PIs "x" and "y" into it (without
+ * order). In inline mode, the object "a" will contain two objects
+ * with name "-pi", and the first object will contain "x", and the
+ * second "y".
+ * Notes:
+ * (1) The name "-pi" is reserved. You cannot use it for your own
+ * tags because tag names must not begin with '-'.
+ * (2) You need not to add a declaration for "-pi" to the DTD. These
+ * elements are handled separately.
+ * (3) Of course, the "-pi" objects are created from exemplars of
+ * your DOM map.
+ *)
+
+ virtual_root : bool;
+ (* true: the topmost element of the XML tree is not the root element,
+ * but the so-called virtual root. The root element is a son of the
+ * virtual root. The virtual root is an ordinary element with name
+ * "-vr".
+ * The following behaviour changes, too:
+ * - PIs occurring outside the root element and outside the DTD are
+ * added to the virtual root instead of the document object
+ * - If processing_instructions_inline is also turned on, these PIs
+ * are added inline to the virtual root
+ * Notes:
+ * (1) The name "-vr" is reserved. You cannot use it for your own
+ * tags because tag names must not begin with '-'.
+ * (2) You need not to add a declaration for "-vr" to the DTD. These
+ * elements are handled separately.
+ * (3) Of course, the "-vr" objects are created from exemplars of
+ * your DOM map.
+ *)
+
+ (* The following options are not implemented, or only for internal
+ * use.
+ *)
+
+ debugging_mode : bool;
+ }
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver)
+ | Channel of in_channel
+ | File of string
+ | Latin1 of string
+ | ExtID of (ext_id * Markup_reader.resolver)
+
+(* Note on sources:
+ *
+ * The sources do not have all the same capabilities. Here the differences:
+ *
+ * - File: A File source reads from a file by name. This has the advantage
+ * that references to external entites can be resolved. - The problem
+ * with SYSTEM references is that they usually contain relative file
+ * names; more exactly, a file name relative to the document containing it.
+ * It is only possible to convert such names to absolute file names if the
+ * name of the document containing such references is known; and File
+ * denotes this name.
+ *
+ * - Channel, Latin1: These sources read from documents given as channels or
+ * (Latin 1-encoded) strings. There is no file name, and because of this
+ * the documents must not contain references to external files (even
+ * if the file names are given as absolute names).
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ * entity to read from is passed to the resolver r as-is.
+ * The intention of this option is to allow customized
+ * resolvers to interpret external identifiers without any restriction.
+ * For example, you can assign the PUBLIC identifiers a meaning (they
+ * currently do not have any), or you can extend the "namespace" of
+ * identifiers.
+ * ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ * entity class. Once the DTD object d is known that will be used during
+ * parsing, the entity e = m d is determined and used together with the
+ * resolver r.
+ * This is only for hackers.
+ *)
+
+
+type 'ext domspec =
+ { map : (node_type, 'ext node) Hashtbl.t;
+ default_element : 'ext node;
+ }
+ (* Specifies which node to use as exemplar for which node type. See the
+ * manual for explanations.
+ *)
+
+val default_config : config
+ (* - The resolver is able to read from files by name
+ * - Warnings are thrown away
+ * - Error message will contain line numbers
+ * - The internal encoding is ISO-8859-1
+ * - standalone declaration is checked
+ *)
+
+val default_extension : ('a node extension) as 'a
+ (* A "null" extension; an extension that does not extend the funtionality *)
+
+val default_dom : ('a node extension as 'a) domspec
+ (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity : config -> source -> dtd
+ (* Parse an entity containing a DTD, and return this DTD. *)
+
+val parse_document_entity : config -> source -> 'ext domspec -> 'ext document
+ (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
+ * and validate the contents of the document against the DTD contained
+ * and/or referenced in the document.
+ *)
+
+val parse_content_entity : config ->
+ source ->
+ dtd ->
+ 'ext domspec ->
+ 'ext node
+ (* Parse a file representing a well-formed fragment of a document. The
+ * fragment must be a single element (i.e. something like <a>...</a>;
+ * not a sequence like <a>...</a><b>...</b>). The element is validated
+ * against the passed DTD, but it is not checked whether the element is
+ * the root element specified in the DTD.
+ * Note that you can create DTDs that specify not to validate at all
+ * (invoke method allow_arbitrary on the DTD).
+ *)
+
+val parse_wf_entity : config -> source -> 'ext domspec -> 'ext document
+ (* Parse a closed document (see parse_document_entity), but do not
+ * validate it. Only checks on well-formedness are performed.
+ *)
+
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/29 23:43:51 gerd
+ * Initial compatibility revision.
+ *
+ * ======================================================================
+ * OLD LOGS:
+ *
+ * Revision 1.4 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.3 2000/05/27 19:24:01 gerd
+ * New option: recognize_standalone_declaration.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ * Revision 1.9 2000/04/30 18:23:38 gerd
+ * New config options 'processing_instructions_inline' and
+ * 'virtual_root'.
+ *
+ * Revision 1.8 2000/03/13 23:46:46 gerd
+ * Change: The 'resolver' component of the 'config' type has
+ * disappeared. Instead, there is a new resolver component in the Entity
+ * and ExtID values of 'source'. I hope that this makes clearer that the
+ * resolver has only an effect if used together with Entity and ExtID
+ * sources.
+ * Change: The Entity value can now return the entity dependent
+ * on the DTD that is going to be used.
+ *
+ * Revision 1.7 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.6 2000/02/22 01:52:45 gerd
+ * Added documentation.
+ *
+ * Revision 1.5 2000/01/20 20:54:43 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.4 1999/09/01 23:09:10 gerd
+ * New function parse_wf_entity that simulates a well-formedness
+ * parser.
+ *
+ * Revision 1.3 1999/09/01 16:26:36 gerd
+ * Added an empty line. This is *really* a big change.
+ *
+ * Revision 1.2 1999/08/14 22:20:27 gerd
+ * The "config" slot has now a component "warner"which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Furthermore, there is a new component "debugging_mode".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+******************************************************************************
+ABOUT-FINDLIB - Package manager for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+The findlib library provides a scheme to manage reusable software components
+(packages), and includes tools that support this scheme. Packages are
+collections of OCaml modules for which metainformation can be stored. The
+packages are kept in the filesystem hierarchy, but with strict directory
+structure. The library contains functions to look the directory up that stores
+a package, to query metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows the user
+to enter queries on the command-line. In order to simplify compilation and
+linkage, there are new frontends of the various OCaml compilers that can
+directly deal with packages.
+
+Together with the packages metainformation is stored. This includes a version
+string, the archives the package consists of, and additional linker options.
+Packages can also be dependent on other packages. There is a query which finds
+out all predecessors of a list of packages and sorts them topologically. The
+new compiler frontends do this implicitly.
+
+Metainformation can be conditional, i.e. depend on a set of predicates. This is
+mainly used to be able to react on certain properties of the environment, such
+as if the bytecode or the native compiler is invoked, if the application is
+multi-threaded, and a few more. If the new compiler frontends are used, most
+predicates are found out automatically.
+
+There is special support for scripts. A new directive, "#require", loads
+packages into scripts. Of course, this works only with newly created toploops
+which include the findlib library.
+
+==============================================================================
+Where to get findlib
+==============================================================================
+
+The manual of findlib is available online [1]. You can download findlib here
+[2].
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz
+
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY f "<em>findlib</em>">
+<!ENTITY F "<em>Findlib</em>">
+
+]>
+
+<readme title="ABOUT-FINDLIB - Package manager for O'Caml">
+ <sect1>
+ <title>Abstract</title>
+<p>
+The &f; library provides a scheme to manage reusable software
+components (packages), and includes tools that support this
+scheme. Packages are collections of OCaml modules for which
+metainformation can be stored. The packages are kept in the filesystem
+hierarchy, but with strict directory structure. The library contains
+functions to look the directory up that stores a package, to query
+metainformation about a package, and to retrieve dependency
+information about multiple packages. There is also a tool that allows
+the user to enter queries on the command-line. In order to simplify
+compilation and linkage, there are new frontends of the various OCaml
+compilers that can directly deal with packages.
+</p>
+
+<p>
+Together with the packages metainformation is stored. This includes a
+version string, the archives the package consists of, and additional
+linker options. Packages can also be dependent on other
+packages. There is a query which finds out all predecessors of a list
+of packages and sorts them topologically. The new compiler frontends
+do this implicitly.
+</p>
+
+<p>
+Metainformation can be conditional, i.e. depend on a set of
+predicates. This is mainly used to be able to react on certain
+properties of the environment, such as if the bytecode or the native
+compiler is invoked, if the application is multi-threaded, and a few
+more. If the new compiler frontends are used, most predicates are
+found out automatically.
+</p>
+
+<p>
+There is special support for scripts. A new directive, "#require",
+loads packages into scripts. Of course, this works only with newly
+created toploops which include the &f; library.
+</p>
+
+ </sect1>
+
+ <sect1><title>Where to get findlib</title>
+ <p>
+The manual of &f; is available <a href="&url.findlib-project;">online</a>.
+You can download &f; <a href="&url.findlib-download;">here</a>.
+</p>
+ </sect1>
+</readme>
--- /dev/null
+******************************************************************************
+Extensions of the XML specification
+******************************************************************************
+
+
+==============================================================================
+This document
+==============================================================================
+
+This parser has some options extending the XML specification. Here, the options
+are explained.
+
+==============================================================================
+Optional declarations instead of mandatory declarations
+==============================================================================
+
+The XML spec demands that elements, notations, and attributes must be declared.
+However, there are sometimes situations where a different rule would be better:
+If there is a declaration, the actual instance of the element type, notation
+reference or attribute must match the pattern of the declaration; but if the
+declaration is missing, a reasonable default declaration should be assumed.
+
+I have an example that seems to be typical: The inclusion of HTML into a meta
+language. Imagine you have defined some type of "generator" or other tool
+working with HTML fragments, and your document contains two types of elements:
+The generating elements (with a name like "gen:xxx"), and the object elements
+which are HTML. As HTML is still evolving, you do not want to declare the HTML
+elements; the HTML fragments should be treated as well-formed XML fragments. In
+contrast to this, the elements of the generator should be declared and
+validated because you can more easily detect errors.
+
+The following two processing instructions can be included into the DTD:
+
+-
+ <?pxp:dtd optional-element-and-notation-declarations?>
+
+ References to unknown element types and notations no longer cause an error.
+ The element may contain everything, but it must be still well-formed. It may
+ have arbitrary attributes, and every attribute is treated as an #IMPLIED
+ CDATA attribute.
+
+-
+ <?pxp:dtd optional-attribute-declarations elements="x y ..."?>
+
+ References to unknown attributes inside one of the enumerated elements no
+ longer cause an error. Such an attribute is treated as an #IMPLIED CDATA
+ attribute.
+ If there are several "optional-attribute-declarations" PIs, they are all
+ interpreted (implicitly merged).
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="Extensions of the XML specification">
+
+ <sect1>
+ <title>This document</title>
+ <p>This parser has some options extending the XML specification. Here, the
+options are explained.
+</p>
+ </sect1>
+
+ <sect1>
+ <title>Optional declarations instead of mandatory declarations</title>
+
+<p>The XML spec demands that elements, notations, and attributes must be
+declared. However, there are sometimes situations where a different rule would
+be better: <em>If</em> there is a declaration, the actual instance of the
+element type, notation reference or attribute must match the pattern of the
+declaration; but if the declaration is missing, a reasonable default declaration
+should be assumed.</p>
+
+<p>I have an example that seems to be typical: The inclusion of HTML into a
+meta language. Imagine you have defined some type of "generator" or other tool
+working with HTML fragments, and your document contains two types of elements:
+The generating elements (with a name like "gen:xxx"), and the object elements
+which are HTML. As HTML is still evolving, you do not want to declare the HTML
+elements; the HTML fragments should be treated as well-formed XML fragments. In
+contrast to this, the elements of the generator should be declared and
+validated because you can more easily detect errors.</p>
+
+<p>The following two processing instructions can be included into the DTD:</p>
+ <ul>
+ <li><p><code><![CDATA[<?pxp:dtd optional-element-and-notation-declarations?>]]></code>
+ References to unknown element types and notations no longer cause an
+ error. The element may contain everything, but it must be still
+ well-formed. It may have arbitrary attributes, and every attribute is
+ treated as an #IMPLIED CDATA attribute.</p>
+ </li>
+ <li><p><code><![CDATA[<?pxp:dtd optional-attribute-declarations elements="x y ..."?>]]></code>
+ References to unknown attributes inside one of the enumerated elements
+ no longer cause an error. Such an attribute is treated as an #IMPLIED
+ CDATA attribute.
+</p>
+
+<p>If there are several "optional-attribute-declarations" PIs, they are all
+interpreted (implicitly merged).</p>
+ </li>
+ </ul>
+ </sect1>
+</readme>
--- /dev/null
+******************************************************************************
+INSTALL - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+The "pxp" package
+==============================================================================
+
+------------------------------------------------------------------------------
+Prerequisites
+------------------------------------------------------------------------------
+
+PXP requires that the netstring package [1] is already installed. PXP works
+only with O'Caml 3.00 (the support for 2.04 has been dropped). The installation
+procedure defined in the Makefile requires findlib [2] to work [3].
+
+------------------------------------------------------------------------------
+Configuration
+------------------------------------------------------------------------------
+
+It is not necessary to configure PXP; but you can switch off the UTF-8 support
+by setting the variable
+
+UTF8_SUPPORT = no
+
+in Makefile.conf. In this case, the UTF-8 modules are not even compiled. - By
+default, the UTF-8 support is enabled.
+
+Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II; if
+this is too long, you can set UTF8_SUPPORT to "no".
+
+------------------------------------------------------------------------------
+Compilation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make all
+ compiles with the bytecode compiler and creates the files pxp_types.cma,
+ pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma, and pxp_utf8.cmo
+ (*). The (*) files are not built if the UTF-8 support is switched off.
+
+- make opt
+ compiles with the native compiler and creates the files pxp_types.cmxa,
+ pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa, and
+ pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support is
+ switched off.
+
+------------------------------------------------------------------------------
+Installation
+------------------------------------------------------------------------------
+
+The Makefile defines the following goals:
+
+- make install
+ installs the bytecode archives, the interface definitions, and if present,
+ the native archives in the default location of findlib as package "pxp"
+
+- make uninstall
+ removes the package "pxp"
+
+- make markup-install
+ installs the Markup compatibility API as package "markup"
+
+- make markup-uninstall
+ removes the package "markup"
+
+------------------------------------------------------------------------------
+Usage with the help of "findlib"
+------------------------------------------------------------------------------
+
+You can refer to the parser as the findlib package "pxp":
+
+ocamlfind ocamlc -package pxp ...
+
+By default, the UTF-8 support modules will be linked in. If you do not need
+them, you may define the predicate "pxp_without_utf8", which causes that the
+UTF-8 relevant parts are not linked with your program; the difference in size
+is about 1 MB:
+
+ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
+
+Note that you can also reduce the size of the resulting executable by
+specifying Netstring-related predicates (e.g. netstring_only_iso); see the
+documentation of Netstring.
+
+------------------------------------------------------------------------------
+Linking with the archives directly
+------------------------------------------------------------------------------
+
+If you need UTF-8 support, you must link your program as follows:
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma
+ pxp_engine.cma pxp_utf8.cmo ...
+
+If you do not need UTF-8, the following suffices:
+
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
+
+
+
+==============================================================================
+The examples
+==============================================================================
+
+In the "examples" directory you find several applications of PXP. They require
+that PXP has been installed using findlib. See the Makefiles in the directories
+for descriptions of "make" goals.
+
+==============================================================================
+Trouble shooting
+==============================================================================
+
+------------------------------------------------------------------------------
+Solaris
+------------------------------------------------------------------------------
+
+The "make" utility of Solaris does not work properly enough; there is a bug in
+it that prevents the so-called suffix rules from being recognized. There are
+two solutions:
+
+- Install GNU make and use it instead of Solaris make. This is the recommended
+ way to solve the problem, as GNU make can process almost every Makefile from
+ open source projects, and you will never have problems with building
+ software again.
+
+- Add the following lines to Makefile.code:
+
+ %.cmx: %.ml
+ $(OCAMLOPT) -c $<
+
+ %.cmo: %.ml
+ $(OCAMLC) -c $<
+
+ %.cmi: %.mli
+ $(OCAMLC) -c $<
+
+ %.ml: %.mll
+ ocamllex $<
+
+
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[2] see http://www.ocaml-programming.de/packages/documentation/findlib/
+
+[3] Findlib is a package manager, see the file ABOUT-FINDLIB.
+
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!ENTITY m "<em>PXP</em>">
+
+]>
+
+<readme title="INSTALL - PXP, the XML parser for O'Caml">
+ <sect1><title>The "pxp" package</title>
+ <sect2><title>Prerequisites</title>
+ <p>
+&m; requires that the <a href="&url.netstring-project;">netstring package
+</a> is already installed. &m; works
+only with O'Caml 3.00 (the support for 2.04 has been dropped).
+The installation
+procedure defined in the Makefile requires <a
+href="&url.findlib-project;">findlib</a> to work<footnote><em>Findlib</em> is a
+package manager, see the file ABOUT-FINDLIB.</footnote>.
+</p>
+ </sect2>
+
+ <sect2><title>Configuration</title>
+ <p>
+It is not necessary to configure PXP; but you can switch off the UTF-8
+support by setting the variable
+
+<code>
+UTF8_SUPPORT = no
+</code>
+
+in Makefile.conf. In this case, the UTF-8 modules are not even compiled.
+- By default, the UTF-8 support is enabled.
+</p>
+
+ <p>
+Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II;
+if this is too long, you can set UTF8_SUPPORT to "no".</p>
+ </sect2>
+
+ <sect2><title>Compilation</title>
+ <p>
+The Makefile defines the following goals:
+</p>
+ <ul>
+ <li>
+ <p>make all</p>
+ <p>compiles with the bytecode compiler and creates the files
+pxp_types.cma, pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma,
+and pxp_utf8.cmo (*). The (*) files are not built if the UTF-8 support
+is switched off.</p>
+ </li>
+ <li>
+ <p>make opt</p>
+ <p>compiles with the native compiler and creates the files
+pxp_types.cmxa, pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa,
+and pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support
+is switched off.</p>
+ </li>
+ </ul>
+ </sect2>
+
+ <sect2><title>Installation</title>
+ <p>
+The Makefile defines the following goals:</p>
+ <ul>
+ <li>
+ <p>make install</p>
+ <p>installs the bytecode archives, the interface definitions, and if
+present, the native archives in the default location of <em>findlib</em> as
+package "pxp"
+</p>
+ </li>
+ <li>
+ <p>make uninstall</p>
+ <p>removes the package "pxp"</p>
+ </li>
+ <li>
+ <p>make markup-install</p>
+ <p>installs the Markup compatibility API as package "markup"</p>
+ </li>
+ <li>
+ <p>make markup-uninstall</p>
+ <p>removes the package "markup"</p>
+ </li>
+ </ul>
+ </sect2>
+
+ <sect2>
+ <title>Usage with the help of "findlib"</title>
+ <p>You can refer to the parser as the findlib package "pxp":
+
+<code>
+ocamlfind ocamlc -package pxp ...
+</code>
+
+By default, the UTF-8 support modules will be linked in. If you do not need
+them, you may define the predicate "pxp_without_utf8", which causes that the
+UTF-8 relevant parts are not linked with your program; the difference in size
+is about 1 MB:
+
+<code>
+ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ...
+</code>
+
+Note that you can also reduce the size of the resulting executable by
+specifying Netstring-related predicates (e.g. netstring_only_iso); see the
+documentation of Netstring.
+</p>
+ </sect2>
+
+ <sect2>
+ <title>Linking with the archives directly</title>
+ <p>If you need UTF-8 support, you must link your program as follows:
+
+<code>
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma
+ pxp_engine.cma pxp_utf8.cmo ...
+</code>
+
+If you do not need UTF-8, the following suffices:
+
+<code>
+ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ...
+</code>
+
+</p>
+ </sect2>
+
+ </sect1>
+
+ <sect1><title>The examples</title>
+ <p>
+In the "examples" directory you find several applications of &m;. They require
+that &m; has been installed using <em>findlib</em>. See the Makefiles in the
+directories for descriptions of "make" goals.
+</p>
+ </sect1>
+
+ <sect1><title>Trouble shooting</title>
+ <sect2><title>Solaris</title>
+ <p>
+The "make" utility of Solaris does not work properly enough; there is a bug
+in it that prevents the so-called suffix rules from being recognized. There
+are two solutions:</p>
+ <ul>
+ <li><p>Install GNU make and use it instead of Solaris make. This is
+the recommended way to solve the problem, as GNU make can process almost
+every Makefile from open source projects, and you will never have problems
+with building software again.</p></li>
+ <li><p>Add the following lines to Makefile.code:
+ <code>
+%.cmx: %.ml
+ $(OCAMLOPT) -c $<
+
+%.cmo: %.ml
+ $(OCAMLC) -c $<
+
+%.cmi: %.mli
+ $(OCAMLC) -c $<
+
+%.ml: %.mll
+ ocamllex $<
+</code>
+</p></li>
+ </ul>
+ </sect2>
+ </sect1>
+</readme>
\ No newline at end of file
--- /dev/null
+.PHONY: all
+all: README INSTALL ABOUT-FINDLIB SPEC PRERELEASE EXTENSIONS
+
+README: README.xml common.xml config.xml
+ readme -text README.xml >README
+
+INSTALL: INSTALL.xml common.xml config.xml
+ readme -text INSTALL.xml >INSTALL
+
+ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml
+ readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB
+
+SPEC: SPEC.xml common.xml config.xml
+ readme -text SPEC.xml >SPEC
+
+EXTENSIONS: EXTENSIONS.xml common.xml config.xml
+ readme -text EXTENSIONS.xml >EXTENSIONS
+
+PRERELEASE: PRERELEASE.xml common.xml config.xml
+ readme -text PRERELEASE.xml >PRERELEASE
+
+config.xml:
+ touch config.xml
+
+common.xml:
+ ln -s dist-common.xml common.xml
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C manual CLEAN
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ $(MAKE) -C manual distclean
+
+.PHONY: symlinks
+symlinks:
+ ln -s ../examples/readme/readme.dtd .
+
--- /dev/null
+******************************************************************************
+README - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Pre-release of PXP, the XML parser for O'Caml
+==============================================================================
+
+PXP is the new, completely revised and partly rewritten validating XML parser
+for O'Caml; the old name, "Markup", has been dropped. The current version of
+PXP is still a bit experimental because it is not fully tested; however, it is
+now stable enough to be used in experimental applications.
+
+PXP will retain most parts of Markup's API; the name PXP emphasizes the
+strengths of the API: it is the Polymorphic XML Parser. The document objects
+representing the parsed file have an interesting polymorphism which allows that
+the user of the parser can control which kind of objects are actually created.
+The current API supports the element type as criterion for object/class
+selection; future APIs will extend this concept such that arbitrary criterions
+are possible (e.g. you may want to have different classes for different
+namespaces).
+
+The current development goals of PXP are:
+
+- Full XML-1.0 conformance: The current pre-release is now very close to
+ strict XML-1.0 conformance. The only bigger difference to the standard is
+ that PXP sometimes accepts DTDs as legal while the standard forbids them
+ (non-deterministic content models).
+ One of the more important improvements since 0.2.10 is the possibility to
+ represent XML documents internally as UTF-8 strings, not only as ISO-8859-1
+ strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer
+ preprocessor hiding the details of the UTF-8 encoding in the lexer
+ definitions.
+
+- Correctness of validation: The well-formedness and valididity constraints
+ must be implemented as correct as possible. The last stable release had
+ already a regression test covering many aspects of XML. The test suite will
+ be extended.
+
+- Parsing performance: It should be possible to process large amounts of data
+ in a reasoable period of time. The last stable release had many stages of
+ processing that wasted time.
+ The current pre-release is already 30 per cent faster than 0.2.10.
+
+- Simplicity of usage: Unlike parsers basing on imperative languages and DOM,
+ the usage of PXP should be simple, even for complex tasks. The current
+ parser API has already many advantages over DOM; especially it is well
+ integrated into the functional and object-oriented language O'Caml. You do
+ not have to deal with artificial representations like "node lists" while the
+ programming environment already provides good support for list structures.
+ The fact that O'Caml allows a functional programming style is interesting
+ for programs transforming XML trees.
+
+==============================================================================
+Download the PXP pre-release
+==============================================================================
+
+The current pre-release is available under
+http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz [1]. There is
+currently no documentation for this version of the software; it is recommended
+to use the Markup manual [2] and compare it with the current module interfaces.
+
+Please note that this is work in progress; it may still contain bugs and
+irregularities.
+
+The parser works only with OCaml-3. The parser needs the netstring package [3],
+at least version 0.9.1.
+
+I am very interested in your opinion to PXP; please contact me [4].
+
+==============================================================================
+Author, Credits, Copying
+==============================================================================
+
+PXP has been written by Gerd Stolpmann [5]; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for
+commercial purposes as long as the license conditions are respected, see the
+file LICENSE coming with the distribution. It allows almost everything.
+
+==============================================================================
+Where to find the stable release
+==============================================================================
+
+Here. [6]
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz
+
+[2] see http://www.ocaml-programming.de/packages/documentation/markup/manual
+
+[3] see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[4] see mailto:gerd@gerd-stolpmann.de
+
+[5] see mailto:gerd@gerd-stolpmann.de
+
+[6] see http://www.ocaml-programming.de/packages/documentation/markup
+
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="README - PXP, the XML parser for O'Caml">
+ <sect1>
+ <title>Pre-release of PXP, the XML parser for O'Caml</title>
+
+ <p>PXP is the new, completely revised and partly rewritten
+validating XML parser
+for O'Caml; the old name, "Markup", has been dropped. The current version
+of PXP is still a bit experimental because it is not fully tested; however,
+it is now stable enough to be used in experimental applications.
+</p>
+
+ <p>PXP will retain most parts of Markup's API; the name PXP
+emphasizes the strengths of the API: it is the Polymorphic XML Parser.
+The document objects representing the parsed file have an interesting
+polymorphism which allows that the user of the parser can control
+which kind of objects are actually created. The current API supports
+the element type as criterion for object/class selection; future APIs will
+extend this concept such that arbitrary criterions are possible
+(e.g. you may want to have different classes for different namespaces).
+</p>
+
+ <p>The current development goals of PXP are:</p>
+
+ <ul>
+ <li><p><em>Full XML-1.0 conformance:</em> The current pre-release
+is now very close to strict XML-1.0 conformance. The only bigger
+difference to the standard is that PXP sometimes accepts DTDs as legal
+while the standard forbids them (non-deterministic content models).</p>
+
+<p>One of the more important improvements since 0.2.10 is the possibility to
+represent XML documents internally as UTF-8 strings, not only as ISO-8859-1
+strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer
+preprocessor hiding the details of the UTF-8 encoding in the lexer definitions.
+</p>
+ </li>
+
+ <li><p><em>Correctness of validation:</em> The well-formedness
+and valididity constraints must be implemented as correct as possible.
+The last stable release had already a regression test covering many
+aspects of XML. The test suite will be extended.</p>
+ </li>
+
+ <li><p><em>Parsing performance:</em> It should be possible to
+process large amounts of data in a reasoable period of time. The last
+stable release had many stages of processing that wasted time.</p>
+
+ <p>The current pre-release is already 30 per cent faster than
+0.2.10.</p>
+ </li>
+
+ <li><p><em>Simplicity of usage:</em> Unlike parsers basing on
+imperative languages and DOM, the usage of PXP should be simple, even
+for complex tasks. The current parser API has already many advantages
+over DOM; especially it is well integrated into the functional and
+object-oriented language O'Caml. You do not have to deal with
+artificial representations like "node lists" while the programming
+environment already provides good support for list structures. The
+fact that O'Caml allows a functional programming style is interesting
+for programs transforming XML trees.</p>
+ </li>
+ </ul>
+ </sect1>
+
+ <sect1>
+ <title>Download the PXP pre-release</title>
+
+ <p>The current pre-release is available under
+<a href="&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz">
+&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz</a>. There is currently no
+documentation for this version of the software; it is recommended to use the <a
+href="&url.markup-manual;">Markup manual</a> and compare it with the current
+module interfaces.</p>
+
+ <p>Please note that this is work in progress; it may still contain bugs
+and irregularities.</p>
+
+ <p>The parser works only with OCaml-3. The parser needs the <a
+href="&url.netstring-project;">netstring package</a>, at least version 0.9.1.
+</p>
+
+ <p>I am very interested in your opinion to PXP; please <a
+href="mailto:&person.gps.mail;">contact me</a>.</p>
+ </sect1>
+
+ <sect1>
+ <title>Author, Credits, Copying</title>
+ <p>
+<em>PXP</em> has been written by &person.gps;; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+</p>
+ </sect1>
+
+ <sect1>
+ <title>Where to find the stable release</title>
+ <p><a href="&url.markup-project;">Here.</a></p>
+ </sect1>
+
+</readme>
+
--- /dev/null
+******************************************************************************
+README - PXP, the XML parser for O'Caml
+******************************************************************************
+
+
+==============================================================================
+Abstract
+==============================================================================
+
+PXP is a validating parser for XML-1.0 which has been written entirely in
+Objective Caml.
+
+PXP is the new name of the parser formerly known as "Markup". PXP means
+"Polymorphic XML parser" and emphasizes its most useful property: that the API
+is polymorphic and can be configured such that different objects are used to
+store different types of elements.
+
+==============================================================================
+Download
+==============================================================================
+
+You can download PXP as gzip'ed tarball [1]. The parser needs the Netstring [2]
+package (0.9.3). Note that PXP requires O'Caml 3.00.
+
+==============================================================================
+User's Manual
+==============================================================================
+
+The manual is included in the distribution both as Postscript document and
+bunch of HTML files. An online version can be found here [3].
+
+==============================================================================
+Author, Credits, Copying
+==============================================================================
+
+PXP has been written by Gerd Stolpmann [4]; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for
+commercial purposes as long as the license conditions are respected, see the
+file LICENSE coming with the distribution. It allows almost everything.
+
+Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug reports.
+
+==============================================================================
+Description
+==============================================================================
+
+PXP is a validating XML parser for O'Caml [5]. It strictly complies to the
+XML-1.0 [6] standard.
+
+The parser is simple to call, usually only one statement (function call) is
+sufficient to parse an XML document and to represent it as object tree.
+
+Once the document is parsed, it can be accessed using a class interface. The
+interface allows arbitrary access including transformations. One of the
+features of the document representation is its polymorphic nature; it is simple
+to add custom methods to the document classes. Furthermore, the parser can be
+configured such that different XML elements are represented by objects created
+from different classes. This is a very powerful feature, because it simplifies
+the structure of programs processing XML documents.
+
+Note that the class interface does not comply to the DOM standard. It was not a
+development goal to realize a standard API (industrial developers can this much
+better than I); however, the API is powerful enough to be considered as
+equivalent with DOM. More important, the interface is compatible with the XML
+information model required by many XML-related standards.
+
+------------------------------------------------------------------------------
+Detailed feature list
+------------------------------------------------------------------------------
+
+- The XML instance is validated against the DTD; any violation of a validation
+ constraint leads to the rejection of the instance. The validator has been
+ carefully implemented, and conforms strictly to the standard. If needed, it
+ is also possible to run the parser in a well-formedness mode.
+
+- If possible, the validator applies a deterministic finite automaton to
+ validate the content models. This ensures that validation can always be
+ performed in linear time. However, in the case that the content models are
+ not deterministic, the parser uses a backtracking algorithm which can be
+ much slower. - It is also possible to reject non-deterministic content
+ models.
+
+- In particular, the validator also checks the complicated rules whether
+ parentheses are properly nested with respect to entities, and whether the
+ standalone declaration is satisfied. On demand, it is checked whether the
+ IDREF attributes only refer to existing nodes.
+
+- Entity references are automatically resolved while the XML text is being
+ scanned. It is not possible to recognize in the object tree where a
+ referenced entity begins or ends; the object tree only represents the
+ logical structure.
+
+- External entities are loaded using a configurable resolver infrastructure.
+ It is possible to connect the parser with an arbitrary XML source.
+
+- The parser can read XML text encoded in a variety of character sets.
+ Independent of this, it is possible to choose the encoding of the internal
+ representation of the tree nodes; the parser automatically converts the
+ input text to this encoding. Currently, the parser supports UTF-8 and
+ ISO-8859-1 as internal encodings.
+
+- The interface of the parser has been designed such that it is best
+ integrated into the language O'Caml. The first goal was simplicity of usage
+ which is achieved by many convenience methods and functions, and by allowing
+ the user to select which parts of the XML text are actually represented in
+ the tree. For example, it is possible to store processing instructions as
+ tree nodes, but the parser can also be configured such that these
+ instructions are put into hashtables. The information model is compatible
+ with the requirements of XML-related standards such as XPath.
+
+- In particular, the node tree can optionally contain or leave out processing
+ instructions and comments. It is also possible to generate a "super root"
+ object which is the parent of the root element. The attributes of elements
+ are normally not stored as nodes, but it is possible to get them wrapped
+ into nodes.
+
+- There is also an interface for DTDs; you can parse and access sequences of
+ declarations. The declarations are fully represented as recursive O'Caml
+ values.
+
+------------------------------------------------------------------------------
+Code examples
+------------------------------------------------------------------------------
+
+This distribution contains several examples:
+
+- validate: simply parses a document and prints all error messages
+
+- readme: Defines a DTD for simple "README"-like documents, and offers
+ conversion to HTML and text files [7].
+
+- xmlforms: This is already a sophisticated application that uses XML as style
+ sheet language and data storage format. It shows how a Tk user interface can
+ be configured by an XML style, and how data records can be stored using XML.
+
+------------------------------------------------------------------------------
+Restrictions and missing features
+------------------------------------------------------------------------------
+
+The following restrictions apply that are not violations of the standard:
+
+- The attributes "xml:space", and "xml:lang" are not supported specially. (The
+ application can do this.)
+
+- The built-in support for SYSTEM and PUBLIC identifiers is limited to local
+ file access. There is no support for catalogs. The parser offers a hook to
+ add missing features.
+
+- It is currently not possible to check for interoperatibility with SGML.
+
+The following features are also missing:
+
+- There is no special support for namespaces. (Perhaps in the next release?)
+
+- There is no support for XPATH or XSLT.
+
+However, I hope that these features will be implemented soon, either by myself
+or by contributors (who are invited to do so).
+
+------------------------------------------------------------------------------
+Recent Changes
+------------------------------------------------------------------------------
+
+- Changed in 1.0:
+ Support for document order.
+
+- Changed in 0.99.8:
+ Several fixes of bugs reported by Haruo Hosoya and Alain Frisch.
+ The class type "node" has been extended: you can go directly to the next and
+ previous nodes in the list; you can refer to nodes by position.
+ There are now some iterators for nodes: find, find_all, find_element,
+ find_all_elements, map_tree, iter_tree.
+ Experimental support for viewing attributes as nodes; I hope that helps
+ Alain writing his XPath evaluator.
+ The user's manual has been revised and is almost up to date.
+
+- Changed in 0.99.7:
+ There are now additional node types T_super_root, T_pinstr and T_comment,
+ and the parser is able to create the corresponding nodes.
+ The functions for character set conversion have been moved to the Netstring
+ package; they are not specific for XML.
+
+- Changed in 0.99.6:
+ Implemented a check on deterministic content models. Added an alternate
+ validator basing on a DFA. - This means that now all mandatory features for
+ an XML-1.0 parser are implemented! The parser is now substantially complete.
+
+- Changed in 0.99.5:
+ The handling of ID and IDREF attributes has changed. The index of nodes
+ containing an ID attribute is now separated from the document. Optionally
+ the parser now checks whether the IDREF attributes refer to existing
+ elements.
+ The element nodes can optionally store the location in the source XML code.
+ The method 'write' writes the XML tree in every supported encoding.
+ (Successor of 'write_compact_as_latin1'.)
+ Several smaller changes and fixes.
+
+- Changed in 0.99.4:
+ The module Pxp_reader has been modernized. The resolver classes are simpler
+ to use. There is now support for URLs.
+ The interface of Pxp_yacc has been improved: The type 'source' is now
+ simpler. The type 'domspec' has gone; the new 'spec' is opaque and performs
+ better. There are some new parsing modes.
+ Many smaller changes.
+
+- Changed in 0.99.3:
+ The markup_* modules have been renamed to pxp_*. There is a new
+ compatibility API that tries to be compatible with markup-0.2.10.
+ The type "encoding" is now a polymorphic variant.
+
+- Changed in 0.99.2:
+ Added checks for the constraints about the standalone declaration.
+ Added regression tests about attribute normalization, attribute checks,
+ standalone checks.
+ Fixed some minor errors of the attribute normalization function.
+ The bytecode/native archives are now separated in a general part, in a
+ ISO-8859-1-relevant part, and a UTF-8-relevant part. The parser can again be
+ compiled with ocamlopt.
+
+- Changed in 0.99.1:
+ In general, this release is an early pre-release of the next stable version
+ 1.00. I do not recommend to use it for serious work; it is still very
+ experimental!
+ The core of the parser has been rewritten using a self-written parser
+ generator.
+ The lexer has been restructured, and can now handle UTF-8 encoded files.
+ Numerous other changes.
+
+
+--------------------------
+
+[1] see http://www.ocaml-programming.de/packages/pxp-1.0.tar.gz
+
+[2] see http://www.ocaml-programming.de/packages/documentation/netstring
+
+[3] see http://www.ocaml-programming.de/packages/documentation/pxp/manual
+
+[4] see mailto:gerd@gerd-stolpmann.de
+
+[5] see http://caml.inria.fr/
+
+[6] see http://www.w3.org/TR/1998/REC-xml-19980210.html
+
+[7] This particular document is an example of this DTD!
+
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!--
+<!ENTITY url.ocaml "http://caml.inria.fr/">
+<!ENTITY url.xml-spec "http://www.w3.org/TR/1998/REC-xml-19980210.html">
+<!ENTITY url.jclark-xmltdata "ftp://ftp.jclark.com/pub/xml/xmltest.zip">
+<!ENTITY url.gps-ocaml-download "http://people.darmstadt.netsurf.de/ocaml">
+<!ENTITY url.markup-download "&url.gps-ocaml-download;/markup-0.1.tar.gz">
+<!ENTITY person.gps '<a
+ href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>'>
+-->
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="README - PXP, the XML parser for O'Caml">
+ <sect1>
+ <title>Abstract</title>
+ <p>
+<em>PXP</em> is a validating parser for XML-1.0 which has been written
+entirely in Objective Caml.
+</p>
+
+ <p>PXP is the new name of the parser formerly known as "Markup".
+PXP means "Polymorphic XML parser" and emphasizes its most useful
+property: that the API is polymorphic and can be configured such that
+different objects are used to store different types of elements.</p>
+ </sect1>
+
+ <sect1>
+ <title>Download</title>
+ <p>
+You can download <em>PXP</em> as gzip'ed <a
+href="&url.pxp-download;">tarball</a>. The parser needs the <a
+href="&url.netstring-project;">Netstring</a> package (0.9.3). Note that PXP
+requires O'Caml 3.00.
+</p>
+ </sect1>
+
+ <sect1>
+ <title>User's Manual</title>
+ <p>
+The manual is included in the distribution both as Postscript document and
+bunch of HTML files. An online version can be found <a
+ href="&url.pxp-manual;">here</a>.
+</p>
+ </sect1>
+
+ <sect1>
+ <title>Author, Credits, Copying</title>
+ <p>
+<em>PXP</em> has been written by &person.gps;; it contains contributions by
+Claudio Sacerdoti Coen. You may copy it as you like,
+you may use it even for commercial purposes as long as the license conditions
+are respected, see the file LICENSE coming with the distribution. It allows
+almost everything.
+</p>
+
+ <p>Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug
+reports.</p>
+ </sect1>
+
+ <sect1>
+ <title>Description</title>
+ <p>
+<em>PXP</em> is a validating XML parser for <a
+href="&url.ocaml;">O'Caml</a>. It strictly complies to the
+<a href="&url.xml-spec;">XML-1.0</a> standard.
+</p>
+
+ <p>The parser is simple to call, usually only one statement (function
+call) is sufficient to parse an XML document and to represent it as object
+tree.</p>
+
+ <p>
+Once the document is parsed, it can be accessed using a class interface.
+The interface allows arbitrary access including transformations. One of
+the features of the document representation is its polymorphic nature;
+it is simple to add custom methods to the document classes. Furthermore,
+the parser can be configured such that different XML elements are represented
+by objects created from different classes. This is a very powerful feature,
+because it simplifies the structure of programs processing XML documents.
+</p>
+
+ <p>
+Note that the class interface does not comply to the DOM standard. It was not a
+development goal to realize a standard API (industrial developers can this much
+better than I); however, the API is powerful enough to be considered as
+equivalent with DOM. More important, the interface is compatible with the
+XML information model required by many XML-related standards.
+</p>
+
+ <sect2>
+ <title>Detailed feature list</title>
+
+ <ul>
+ <li><p>The XML instance is validated against the DTD; any violation of
+a validation constraint leads to the rejection of the instance. The validator
+has been carefully implemented, and conforms strictly to the standard. If
+needed, it is also possible to run the parser in a well-formedness mode.</p>
+ </li>
+ <li><p>If possible, the validator applies a deterministic finite
+automaton to validate the content models. This ensures that validation can
+always be performed in linear time. However, in the case that the content
+models are not deterministic, the parser uses a backtracking algorithm which
+can be much slower. - It is also possible to reject non-deterministic content
+models.</p>
+ </li>
+ <li><p>In particular, the validator also checks the complicated rules
+whether parentheses are properly nested with respect to entities, and whether
+the standalone declaration is satisfied. On demand, it is checked whether the
+IDREF attributes only refer to existing nodes.</p>
+ </li>
+ <li><p>Entity references are automatically resolved while the XML text
+is being scanned. It is not possible to recognize in the object tree where a
+referenced entity begins or ends; the object tree only represents the logical structure.</p>
+ </li>
+ <li><p>External entities are loaded using a configurable resolver
+infrastructure. It is possible to connect the parser with an arbitrary XML source.</p>
+ </li>
+ <li><p>The parser can read XML text encoded in a variety of character
+sets. Independent of this, it is possible to choose the encoding of the
+internal representation of the tree nodes; the parser automatically converts
+the input text to this encoding. Currently, the parser supports UTF-8 and
+ISO-8859-1 as internal encodings.</p>
+ </li>
+ <li><p>The interface of the parser has been designed such that it is
+best integrated into the language O'Caml. The first goal was simplicity of
+usage which is achieved by many convenience methods and functions, and by
+allowing the user to select which parts of the XML text are actually
+represented in the tree. For example, it is possible to store processing
+instructions as tree nodes, but the parser can also be configured such that
+these instructions are put into hashtables. The information model is compatible
+with the requirements of XML-related standards such as XPath.</p>
+ </li>
+ <li><p>In particular, the node tree can optionally contain or leave out
+processing instructions and comments. It is also possible to generate a "super
+root" object which is the parent of the root element. The attributes of
+elements are normally not stored as nodes, but it is possible to get them
+wrapped into nodes.</p>
+ </li>
+ <li><p>There is also an interface for DTDs; you can parse and access
+sequences of declarations. The declarations are fully represented as recursive
+O'Caml values.
+</p>
+ </li>
+ </ul>
+ </sect2>
+
+
+ <sect2>
+ <title>Code examples</title>
+ <p>
+This distribution contains several examples:</p>
+ <ul>
+ <li><p>
+<em>validate:</em> simply parses a
+document and prints all error messages
+</p></li>
+
+ <li><p>
+<em>readme:</em> Defines a DTD for simple "README"-like documents, and offers
+conversion to HTML and text files<footnote>This particular document is an
+example of this DTD!</footnote>.
+</p></li>
+
+ <li><p>
+<em>xmlforms:</em> This is already a
+sophisticated application that uses XML as style sheet language and data
+storage format. It shows how a Tk user interface can be configured by an
+XML style, and how data records can be stored using XML.
+</p></li>
+ </ul>
+ </sect2>
+
+ <sect2>
+ <title>Restrictions and missing features</title>
+ <p>
+The following restrictions apply that are not violations of the standard:
+</p>
+ <ul>
+ <li><p>
+The attributes "xml:space", and "xml:lang" are not supported specially.
+ (The application can do this.)</p></li>
+
+ <li><p>
+The built-in support for SYSTEM and PUBLIC identifiers is limited to
+ local file access. There is no support for catalogs. The parser offers
+ a hook to add missing features.</p></li>
+
+ <li><p>
+It is currently not possible to check for interoperatibility with SGML.
+</p></li>
+ </ul>
+
+<p>The following features are also missing:</p>
+ <ul>
+ <li><p>There is no special support for namespaces. (Perhaps in the next release?)</p>
+ </li>
+ <li><p>There is no support for XPATH or XSLT.</p>
+ </li>
+ </ul>
+<p>However, I hope that these features will be implemented soon, either by
+myself or by contributors (who are invited to do so).</p>
+ </sect2>
+
+ <sect2>
+ <title>Recent Changes</title>
+ <ul>
+ <li>
+ <p>Changed in 1.0:</p>
+ <p>Support for document order.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.8:</p>
+ <p>Several fixes of bugs reported by Haruo Hosoya and Alain
+Frisch.</p>
+ <p>The class type "node" has been extended: you can go directly to
+the next and previous nodes in the list; you can refer to nodes by
+position.</p>
+ <p>There are now some iterators for nodes: find, find_all,
+find_element, find_all_elements, map_tree, iter_tree.</p>
+ <p>Experimental support for viewing attributes as nodes; I hope that
+helps Alain writing his XPath evaluator.</p>
+ <p>The user's manual has been revised and is almost up to date.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.7:</p>
+ <p>There are now additional node types T_super_root, T_pinstr and
+T_comment, and the parser is able to create the corresponding nodes.</p>
+ <p>The functions for character set conversion have been moved to
+the Netstring package; they are not specific for XML.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.6:</p>
+ <p>Implemented a check on deterministic content models. Added
+an alternate validator basing on a DFA. - This means that now all mandatory
+features for an XML-1.0 parser are implemented! The parser is now substantially
+complete.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.5:</p>
+ <p>The handling of ID and IDREF attributes has changed. The
+index of nodes containing an ID attribute is now separated from the document.
+Optionally the parser now checks whether the IDREF attributes refer to
+existing elements.</p>
+ <p>The element nodes can optionally store the location in the
+source XML code.</p>
+ <p>The method 'write' writes the XML tree in every supported
+encoding. (Successor of 'write_compact_as_latin1'.)</p>
+ <p>Several smaller changes and fixes.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.4:</p>
+ <p>The module Pxp_reader has been modernized. The resolver classes
+are simpler to use. There is now support for URLs.</p>
+ <p>The interface of Pxp_yacc has been improved: The type 'source'
+is now simpler. The type 'domspec' has gone; the new 'spec' is opaque and
+performs better. There are some new parsing modes.</p>
+ <p>Many smaller changes.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.3:</p>
+ <p>The markup_* modules have been renamed to pxp_*. There is a new
+compatibility API that tries to be compatible with markup-0.2.10.</p>
+ <p>The type "encoding" is now a polymorphic variant.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.2:</p>
+ <p>Added checks for the constraints about the standalone
+declaration.</p>
+ <p>Added regression tests about attribute normalization,
+attribute checks, standalone checks.</p>
+ <p>Fixed some minor errors of the attribute normalization
+function.</p>
+ <p>The bytecode/native archives are now separated in
+a general part, in a ISO-8859-1-relevant part, and a UTF-8-relevant
+part. The parser can again be compiled with ocamlopt.</p>
+ </li>
+ <li>
+ <p>Changed in 0.99.1:</p>
+ <p>In general, this release is an early pre-release of the
+next stable version 1.00. I do not recommend to use it for serious
+work; it is still very experimental!</p>
+ <p>The core of the parser has been rewritten using a self-written
+parser generator.</p>
+ <p>The lexer has been restructured, and can now handle UTF-8
+encoded files.</p>
+ <p>Numerous other changes.</p>
+ </li>
+
+<!--
+ <li>
+ <p>Changed in 0.2.10:</p>
+ <p>Bugfix: in the "allow_undeclared_attributes" feature.</p>
+ <p>Bugfix: in the methods write_compact_as_latin1.</p>
+ <p>Improvement: The code produced by the codewriter module can be
+faster compiled and with less memory usage.</p>
+ </li>
+
+ <li>
+ <p>Changed in 0.2.9:</p>
+ <p>New: The module Markup_codewriter generates for a given XML
+tree O'Caml code that creates the same XML tree. This is useful for
+applications which use large, constant XML trees.</p>
+ <p>New: Documents and DTDs have a method write_compact_as_latin1
+that writes an XML tree to a buffer or to a channel. (But it is not a pretty
+printer...)</p>
+ <p>Enhancement: If a DTD contains the processing instruction
+<code>
+<?xml:allow_undeclared_attributes x?></code>
+where "x" is the name of an already declared element it is allowed that
+instances of this element type have attributes that have not been declared.
+</p>
+ <p>New function Markup_types.string_of_exn that converts an
+exception from Markup into a readable string.</p>
+ <p>Change: The module Markup_reader contains all resolvers.
+The resolver API is now stable.</p>
+ <p>New parser modes processing_instructions_inline and
+virtual_root that help locating processing instructions exactly (if needed).
+</p>
+ <p>Many bugs regarding CRLF handling have been fixed.</p>
+ <p>The distributed tarball contains now the regression test suite.
+</p>
+ <p>The manual has been extended (but it is still incomplete and
+still behind the code).</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.8:</p>
+ <p>A bit more documentation (Markup_yacc).</p>
+ <p>Bugfix: In previous versions, the second trial to refer to
+an entity caused a Bad_character_stream exception. The reason was improper
+re-initialization of the resolver object.</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.7:</p>
+ <p>Added some methods in Markup_document.</p>
+ <p>Bugfix: in method orphaned_clone</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.6:</p>
+ <p>Enhancement: The config parameter has a new component
+"errors_with_line_numbers". If "true", error exceptions come with line numbers
+(the default; and the only option in the previous versions); if "false"
+the line numbers are left out (only character positions). The parser is 10 to
+20 percent faster if the lines are not tracked.</p>
+ <p>Enhancement: If a DTD contains the processing instruction
+<code>
+<?xml:allow_undeclared_elements_and_notations?></code>
+it is allowed that
+elements and notations are undeclared. However, the elements for which
+declarations exist are still validated. The main effect is that the
+keyword ALL in element declarations means that also undeclared elements
+are permitted at this location.</p>
+ <p>Bugfix in method "set_nodes" of class Markup_document.node_impl.
+</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.5:</p>
+ <p>If the XML source is a string (i.e. Latin1 some_string is passed
+to the parser functions as source), resolving did not work properly in
+previous releases. This is now fixed.
+</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.4:</p>
+ <p>A problem with some kind of DTD that does not specify the name
+of the root element was fixed. As a result, the "xmlforms" application works
+again. Again thanks to Haruo.</p>
+ <p>Due to the XML specs it is forbidden that parameter entities are
+referenced within the internal subset if the referenced text is not a
+complete declaration itself. This is checked, but the check was too hard;
+even in external entities referenced from the internal subset this rule
+was enforced. This has been corrected; in external entities it is now possible
+to use parameter entities in an unrestricted way.
+</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.3:</p>
+ <p>A fix for a problem when installing Markup on Solaris.
+Haruo detected the problem.</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.2:</p>
+ <p>A single bugfix: The parser did not reject documents where the
+root element was not the element declared as root element. Again thanks
+to Claudio.</p>
+ </li>
+ <li>
+ <p>Changed in 0.2.1:</p>
+ <p>A single bugfix which reduces the number of warnings. Thanks
+to Claudio for detecting the bug.</p>
+ </li>
+ <li>
+ <p>Changed in 0.2:</p>
+ <p>
+Much more constraints are checked in the 0.2 release than in 0.1. Especially
+that entities are properly nested is now guaranteed; parsed entities now always
+match the corresponding production of the grammar.</p>
+ <p>
+Many weak checks have been turned into strong checks. For example, it is now
+detected if the "version", "encoding", and "standalone" attributes of an XML
+declaration are ordered in the right way.
+</p>
+ <p>
+The error messages have been improved.
+</p>
+ </li>
+-->
+ </ul>
+ </sect2>
+ </sect1>
+</readme>
+
--- /dev/null
+******************************************************************************
+Notes on the XML specification
+******************************************************************************
+
+
+==============================================================================
+This document
+==============================================================================
+
+There are some points in the XML specification which are ambiguous. The
+following notes discuss these points, and describe how this parser behaves.
+
+==============================================================================
+Conditional sections and the token ]]>
+==============================================================================
+
+It is unclear what happens if an ignored section contains the token ]]> at
+places where it is normally allowed, i.e. within string literals and comments,
+e.g.
+
+<![IGNORE[ <!-- ]]> --> ]]>
+
+On the one hand, the production rule of the XML grammar does not treat such
+tokens specially. Following the grammar, already the first ]]> ends the
+conditional section
+
+<![IGNORE[ <!-- ]]>
+
+and the other tokens are included into the DTD.
+
+On the other hand, we can read: "Like the internal and external DTD subsets, a
+conditional section may contain one or more complete declarations, comments,
+processing instructions, or nested conditional sections, intermingled with
+white space" (XML 1.0 spec, section 3.4). Complete declarations and comments
+may contain ]]>, so this is contradictory to the grammar.
+
+The intention of conditional sections is to include or exclude the section
+depending on the current replacement text of a parameter entity. Almost always
+such sections are used as in
+
+<!ENTITY % want.a.feature.or.not "INCLUDE"> (or "IGNORE")
+<![ %want.a.feature.or.not; [ ... ]]>
+
+This means that if it is possible to include a section it must also be legal to
+ignore the same section. This is a strong indication that the token ]]> must
+not count as section terminator if it occurs in a string literal or comment.
+
+This parser implements the latter.
+
+==============================================================================
+Conditional sections and the inclusion of parameter entities
+==============================================================================
+
+It is unclear what happens if an ignored section contains a reference to a
+parameter entity. In most cases, this is not problematic because nesting of
+parameter entities must respect declaration braces. The replacement text of
+parameter entities must either contain a whole number of declarations or only
+inner material of one declaration. Almost always it does not matter whether
+these references are resolved or not (the section is ignored).
+
+But there is one case which is not explicitly specified: Is it allowed that the
+replacement text of an entity contains the end marker ]]> of an ignored
+conditional section? Example:
+
+<!ENTITY % end "]]>">
+<![ IGNORE [ %end;
+
+We do not find the statement in the XML spec that the ]]> must be contained in
+the same entity as the corresponding <![ (as for the tokens <! and > of
+declarations). So it is possible to conclude that ]]> may be in another entity.
+
+Of course, there are many arguments not to allow such constructs: The resulting
+code is incomprehensive, and parsing takes longer (especially if the entities
+are external). I think the best argument against this kind of XML is that the
+XML spec is not detailed enough, as it contains no rules where entity
+references should be recognized and where not. For example:
+
+<!ENTITY % y "]]>">
+<!ENTITY % x "<!ENTITY z '<![CDATA[some text%y;'>">
+<![ IGNORE [ %x; ]]>
+
+Which token ]]> counts? From a logical point of view, the ]]> in the third line
+ends the conditional section. As already pointed out, the XML spec permits the
+interpretation that ]]> is recognized even in string literals, and this may be
+also true if it is "imported" from a separate entity; and so the first ]]>
+denotes the end of the section.
+
+As a practical solution, this parser does not expand parameter entities in
+ignored sections. Furthermore, it is also not allowed that the ending ]]> of
+ignored or included sections is contained in a different entity than the
+starting <![ token.
+
+==============================================================================
+Standalone documents and attribute normalization
+==============================================================================
+
+If a document is declared as stand-alone, a restriction on the effect of
+attribute normalization takes effect for attributes declared in external
+entities. Normally, the parser knows the type of the attribute from the ATTLIST
+declaration, and it can normalize attribute values depending on their types.
+For example, an NMTOKEN attribute can be written with leading or trailing
+spaces, but the parser returns always the nmtoken without such added spaces; in
+contrast to this, a CDATA attribute is not normalized in this way. For
+stand-alone document the type information is not available if the ATTLIST
+declaration is located in an external entity. Because of this, the XML spec
+demands that attribute values must be written in their normal form in this
+case, i.e. without additional spaces.
+
+This parser interprets this restriction as follows. Obviously, the substitution
+of character and entity references is not considered as a "change of the value"
+as a result of the normalization, because these operations will be performed
+identically if the ATTLIST declaration is not available. The same applies to
+the substitution of TABs, CRs, and LFs by space characters. Only the removal of
+spaces depending on the type of the attribute changes the value if the ATTLIST
+is not available.
+
+This means in detail: CDATA attributes never violate the stand-alone status.
+ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator attributes must not be
+written with leading and/or trailing spaces. IDREF, ENTITIES, and NMTOKENS
+attributes must not be written with extra spaces at the beginning or at the end
+of the value, or between the tokens of the list.
+
+The whole check is dubious, because the attribute type expresses also a
+semantical constraint, not only a syntactical one. At least this parser
+distinguishes strictly between single-value and list types, and returns the
+attribute values differently; the first are represented as Value s (where s is
+a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The
+internal representation of the value is dependent on the attribute type, too,
+such that even normalized values are processed differently depending on whether
+the attribute has list type or not. For this parser, it makes still a
+difference whether a value is normalized and processed as if it were CDATA, or
+whether the value is processed according to its declared type.
+
+The stand-alone check is included to be able to make a statement whether other,
+well-formedness parsers can process the document. Of course, these parsers
+always process attributes as CDATA, and the stand-alone check guarantees that
+these parsers will always see the normalized values.
+
+==============================================================================
+Standalone documents and the restrictions on entity
+references
+==============================================================================
+
+Stand-alone documents must not refer to entities which are declared in an
+external entity. This parser applies this rule only: to general and NDATA
+entities when they occur in the document body (i.e. not in the DTD); and to
+general and NDATA entities occuring in default attribute values declared in the
+internal subset of the DTD.
+
+Parameter entities are out of discussion for the stand-alone property. If there
+is a parameter entity reference in the internal subset which was declared in an
+external entity, it is not available in the same way as the external entity is
+not available that contains its declaration. Because of this "equivalence",
+parameter entity references are not checked on violations against the
+stand-alone declaration. It simply does not matter. - Illustration:
+
+Main document:
+
+<!ENTITY % ext SYSTEM "ext">
+%ext;
+%ent;
+
+"ext" contains:
+
+<!ENTITY % ent "<!ELEMENT el (other*)>">
+
+
+
+Here, the reference %ent; would be illegal if the standalone declaration is
+strictly interpreted. This parser handles the references %ent; and %ext;
+equivalently which means that %ent; is allowed, but the element type "el" is
+treated as externally declared.
+
+General entities can occur within the DTD, but they can only be contained in
+the default value of attributes, or in the definition of other general
+entities. The latter can be ignored, because the check will be repeated when
+the entities are expanded. Though, general entities occuring in default
+attribute values are actually checked at the moment when the default is used in
+an element instance.
+
+General entities occuring in the document body are always checked.
+
+NDATA entities can occur in ENTITY attribute values; either in the element
+instance or in the default declaration. Both cases are checked.
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd" [
+
+<!ENTITY % common SYSTEM "common.xml">
+%common;
+
+<!-- Special HTML config: -->
+<!ENTITY % readme:html:up '<a href="../..">up</a>'>
+
+<!ENTITY % config SYSTEM "config.xml">
+%config;
+
+]>
+
+<readme title="Notes on the XML specification">
+
+ <sect1>
+ <title>This document</title>
+ <p>There are some points in the XML specification which are ambiguous.
+The following notes discuss these points, and describe how this parser
+behaves.</p>
+ </sect1>
+
+ <sect1>
+ <title>Conditional sections and the token ]]></title>
+
+ <p>It is unclear what happens if an ignored section contains the
+token ]]> at places where it is normally allowed, i.e. within string
+literals and comments, e.g.
+
+<code>
+<![IGNORE[ <!-- ]]> --> ]]>
+</code>
+
+On the one hand, the production rule of the XML grammar does not treat such
+tokens specially. Following the grammar, already the first ]]> ends
+the conditional section
+
+<code>
+<![IGNORE[ <!-- ]]>
+</code>
+
+and the other tokens are included into the DTD.</p>
+
+<p>On the other hand, we can read: "Like the internal and external DTD subsets,
+a conditional section may contain one or more complete declarations, comments,
+processing instructions, or nested conditional sections, intermingled with
+white space" (XML 1.0 spec, section 3.4). Complete declarations and comments
+may contain ]]>, so this is contradictory to the grammar.</p>
+
+<p>The intention of conditional sections is to include or exclude the section
+depending on the current replacement text of a parameter entity. Almost
+always such sections are used as in
+
+<code>
+<!ENTITY % want.a.feature.or.not "INCLUDE"> (or "IGNORE")
+<![ %want.a.feature.or.not; [ ... ]]>
+</code>
+
+This means that if it is possible to include a section it must also be
+legal to ignore the same section. This is a strong indication that
+the token ]]> must not count as section terminator if it occurs
+in a string literal or comment.</p>
+
+<p>This parser implements the latter.</p>
+
+ </sect1>
+
+ <sect1>
+ <title>Conditional sections and the inclusion of parameter entities</title>
+
+ <p>It is unclear what happens if an ignored section contains a reference
+to a parameter entity. In most cases, this is not problematic because
+nesting of parameter entities must respect declaration braces. The
+replacement text of parameter entities must either contain a <em>whole</em>
+number of declarations or only inner material of one declaration. Almost always
+it does not matter whether these references are resolved or not
+(the section is ignored).</p>
+
+ <p>But there is one case which is not explicitly specified: Is it allowed
+that the replacement text of an entity contains the end marker ]]>
+of an ignored conditional section? Example:
+
+<code>
+<!ENTITY % end "]]>">
+<![ IGNORE [ %end;
+</code>
+
+We do not find the statement in the XML spec that the ]]> must be contained
+in the same entity as the corresponding <![ (as for the tokens <! and
+> of declarations). So it is possible to conclude that ]]> may be in
+another entity.</p>
+
+ <p>Of course, there are many arguments not to allow such constructs: The
+resulting code is incomprehensive, and parsing takes longer (especially if the
+entities are external). I think the best argument against this kind of XML
+is that the XML spec is not detailed enough, as it contains no rules where
+entity references should be recognized and where not. For example:
+
+<code>
+<!ENTITY % y "]]>">
+<!ENTITY % x "<!ENTITY z '<![CDATA[some text%y;'>">
+<![ IGNORE [ %x; ]]>
+</code>
+
+Which token ]]> counts? From a logical point of view, the ]]> in the
+third line ends the conditional section. As already pointed out, the XML spec
+permits the interpretation that ]]> is recognized even in string literals,
+and this may be also true if it is "imported" from a separate entity; and so
+the first ]]> denotes the end of the section.</p>
+
+ <p>As a practical solution, this parser does not expand parameter entities
+in ignored sections. Furthermore, it is also not allowed that the ending ]]>
+of ignored or included sections is contained in a different entity than the
+starting <![ token.</p>
+ </sect1>
+
+
+ <sect1>
+ <title>Standalone documents and attribute normalization</title>
+
+ <p>
+If a document is declared as stand-alone, a restriction on the effect of
+attribute normalization takes effect for attributes declared in external
+entities. Normally, the parser knows the type of the attribute from
+the ATTLIST declaration, and it can normalize attribute values depending
+on their types. For example, an NMTOKEN attribute can be written with
+leading or trailing spaces, but the parser returns always the nmtoken
+without such added spaces; in contrast to this, a CDATA attribute is
+not normalized in this way. For stand-alone document the type information is
+not available if the ATTLIST declaration is located in an external
+entity. Because of this, the XML spec demands that attribute values must
+be written in their normal form in this case, i.e. without additional
+spaces.
+</p>
+ <p>This parser interprets this restriction as follows. Obviously,
+the substitution of character and entity references is not considered
+as a "change of the value" as a result of the normalization, because
+these operations will be performed identically if the ATTLIST declaration
+is not available. The same applies to the substitution of TABs, CRs,
+and LFs by space characters. Only the removal of spaces depending on
+the type of the attribute changes the value if the ATTLIST is not
+available.
+</p>
+ <p>This means in detail: CDATA attributes never violate the
+stand-alone status. ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator
+attributes must not be written with leading and/or trailing spaces. IDREF,
+ENTITIES, and NMTOKENS attributes must not be written with extra spaces at the
+beginning or at the end of the value, or between the tokens of the list.
+</p>
+ <p>The whole check is dubious, because the attribute type expresses also a
+semantical constraint, not only a syntactical one. At least this parser
+distinguishes strictly between single-value and list types, and returns the
+attribute values differently; the first are represented as Value s (where s is
+a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The
+internal representation of the value is dependent on the attribute type, too,
+such that even normalized values are processed differently depending on
+whether the attribute has list type or not. For this parser, it makes still a
+difference whether a value is normalized and processed as if it were CDATA, or
+whether the value is processed according to its declared type.
+</p>
+ <p>The stand-alone check is included to be able to make a statement
+whether other, well-formedness parsers can process the document. Of course,
+these parsers always process attributes as CDATA, and the stand-alone check
+guarantees that these parsers will always see the normalized values.
+</p>
+ </sect1>
+
+ <sect1>
+ <title>Standalone documents and the restrictions on entity
+references</title>
+ <p>
+Stand-alone documents must not refer to entities which are declared in an
+external entity. This parser applies this rule only: to general and NDATA
+entities when they occur in the document body (i.e. not in the DTD); and to
+general and NDATA entities occuring in default attribute values declared in the
+internal subset of the DTD.
+</p>
+ <p>
+Parameter entities are out of discussion for the stand-alone property. If there
+is a parameter entity reference in the internal subset which was declared in an
+external entity, it is not available in the same way as the external entity is
+not available that contains its declaration. Because of this "equivalence",
+parameter entity references are not checked on violations against the
+stand-alone declaration. It simply does not matter. - Illustration:
+</p>
+
+ <p>
+Main document:
+
+ <code><![CDATA[
+<!ENTITY % ext SYSTEM "ext">
+%ext;
+%ent;
+]]></code>
+
+"ext" contains:
+
+ <code><![CDATA[
+<!ENTITY % ent "<!ELEMENT el (other*)>">
+]]></code>
+</p>
+
+ <p>Here, the reference %ent; would be illegal if the standalone
+declaration is strictly interpreted. This parser handles the references
+%ent; and %ext; equivalently which means that %ent; is allowed, but the
+element type "el" is treated as externally declared.
+</p>
+
+ <p>
+General entities can occur within the DTD, but they can only be contained in
+the default value of attributes, or in the definition of other general
+entities. The latter can be ignored, because the check will be repeated when
+the entities are expanded. Though, general entities occuring in default
+attribute values are actually checked at the moment when the default is
+used in an element instance.
+</p>
+ <p>
+General entities occuring in the document body are always checked.</p>
+ <p>
+NDATA entities can occur in ENTITY attribute values; either in the element
+instance or in the default declaration. Both cases are checked.
+</p>
+ </sect1>
+
+</readme>
--- /dev/null
+------------------------------------------------ -*- indented-text -*-
+Some Notes About the Design:
+----------------------------------------------------------------------
+
+----------------------------------------------------------------------
+Compilation
+----------------------------------------------------------------------
+
+Compilation is non-trivial because:
+
+ - The lexer and parser generators ocamlllex resp. ocamlyacc normally
+ create code such that the parser module precedes the lexer module.
+ THIS design requires that the lexer layer precedes the entity layer
+ which precedes the parser layer, because the parsing results modify
+ the behaviour of the lexer and entity layers. There is no way to get
+ around this because of the nature of XML.
+
+ So the dependency relation of the lexer and the parser is modified;
+ in particular the "token" type that is normally defined by the
+ generated parser is moved to a common prdecessor of both lexer
+ and parser.
+
+ - Another modification of the standard way of handling parsers is that
+ the parser is turned into an object. This is necessary because the
+ whole parser is polymorphic, i.e. there is a type parameter (the
+ type of the node extension).
+
+......................................................................
+
+First some modules are generated as illustrated by the following
+diagram:
+
+
+ markup_yacc.mly
+ | |
+ \|/ \|/ [ocamlyacc, 1]
+ V V
+ markup_yacc.mli markup_yacc.ml
+ | --> renamed into markup_yacc.ml0
+ [awk, 2] \|/ |
+ V \|/ [sed, 3]
+ markup_yacc_token.mlf V
+ | | markup_yacc.ml
+ markup_lexer_types_ | |
+ shadow.mli | | | markup_lexer_types_
+ \|/ [sed, \|/ | shadow.ml
+ V 4] V | |
+ markup_lexer_types.mli | | [sed, 4]
+ \|/ \|/
+ V V
+ markup_lexer_types.ml
+
+
+ markup_yacc_shadow.mli
+ |
+ \|/ [replaces, 5]
+ V
+ markup_yacc.mli
+
+
+
+ markup_lexers.mll
+ |
+ \|/ [ocamllex, 6]
+ V
+ markup_lexers.ml
+
+
+Notes:
+
+ (1) ocamlyacc generates both a module and a module interface.
+ The module is postprocessed in step (3). The interface cannot
+ be used, but it contains the definition of the "token" type.
+ This definition is extracted in step (2). The interface is
+ completely replaced in step (5) by a different file.
+
+ (2) An "awk" script extracts the definition of the type "token".
+ "token" is created by ocamlyacc upon the %token directives
+ in markup_yacc.mly, and normally "token" is defined in
+ the module generated by ocamlyacc. This turned out not to be
+ useful as the module dependency must be that the lexer is
+ an antecedent of the parser and not vice versa (as usually),
+ so the "token" type is "moved" to the module Markup_lexer_types
+ which is an antecedent of both the lexer and the parser.
+
+ (3) A "sed" script turns the generated parser into an object.
+ This is rather simple; some "let" definitions must be rewritten
+ as "val" definitions, the other "let" definitions as
+ "method" definitions. The parser object is needed because
+ the whole parser has a polymorphic type parameter.
+
+ (4) The implementation and definition of Markup_lexer_types are
+ both generated by inserting the "token" type definition
+ (in markup_lexer_types.mlf) into two pattern files,
+ markup_lexer_types_shadow.ml resp. -.mli. The point of insertion
+ is marked by the string INCLUDE_HERE.
+
+ (5) The generated interface of the Markup_yacc module is replaced
+ by a hand-written file.
+
+ (6) ocamllex generates the lexer; this process is not patched in any
+ way.
+
+......................................................................
+
+After the additional modules have been generated, compilation proceeds
+in the usual manner.
+
+
+----------------------------------------------------------------------
+Hierarchy of parsing layers:
+----------------------------------------------------------------------
+
+From top to bottom:
+
+ - Parser: Markup_yacc
+ + gets input stream from the main entity object
+ + checks most of the grammar
+ + creates the DTD object as side-effect
+ + creates the element tree as side-effect
+ + creates further entity objects that are entered into the DTD
+ - Entity layer: Markup_entity
+ + gets input stream from the lexers, or another entity object
+ + handles entity references: if a reference is encountered the
+ input stream is redirected such that the tokens come from the
+ referenced entity object
+ + handles conditional sections
+ - Lexer layer: Markup_lexers
+ + gets input from lexbuffers created by resolvers
+ + different lexers for different lexical contexts
+ + a lexer returns pairs (token,lexid), where token is the scanned
+ token, and lexid is the name of the lexer that must be used for
+ the next token
+ - Resolver layer: Markup_entity
+ + a resolver creates the lexbuf from some character source
+ + a resolver recodes the input and handles the encoding scheme
+
+----------------------------------------------------------------------
+The YACC based parser
+----------------------------------------------------------------------
+
+ocamlyacc allows it to pass an arbitrary 'next_token' function to the
+parsing functions. We always use 'en # next_token()' where 'en' is the
+main entity object representing the main file to be parsed.
+
+The parser is not functional, but uses mainly side-effects to accumulate
+the structures that have been recognized. This is very important for the
+entity definitions, because once an entity definition has been found there
+may be a reference to it which is handled by the entity layer (which is
+below the yacc layer). This means that such a definition modifies the
+token source of the parser, and this can only be handled by side-effects
+(at least in a sensible manner; a purely functional parser would have to
+pass unresolved entity references to its caller, which would have to
+resolve the reference and to re-parse the whole document!).
+
+Note that also element definitions profit from the imperative style of
+the parser; an element instance can be validated directly once the end
+tag has been read in.
+
+----------------------------------------------------------------------
+The entity layer
+----------------------------------------------------------------------
+
+The parser gets the tokens from the main entity object. This object
+controls the underlying lexing mechanism (see below), and already
+interprets the following:
+
+- Conditional sections (if they are allowed in this entity):
+ The structures <![ INCLUDE [ ... ]]> and <! IGNORE [ ... ]]> are
+ recognized and interpreted.
+
+ This would be hard to realize by the yacc parser, because:
+ - INCLUDE and IGNORE are not recognized as lexical keywords but as names.
+ This means that the parser cannot select different rules for them.
+ - The text after IGNORE requires a different lexical handling.
+
+- Entity references: &name; and %name;
+ The named entity is looked up and the input source is redirected to it, i.e.
+ if the main entity object gets the message 'next_token' this message is
+ forwarded to the referenced entity. (This entity may choose to forward the
+ message again to a third entity, and so on.)
+
+ There are some fine points:
+
+ - It is okay that redirection happens at token level, not at character level:
+ + General entities must always match the 'content' production, and because
+ of this they must always consist of a whole number of tokens.
+ + If parameter entities are resolved, the XML specification states that
+ a space character is inserted before and after the replacement text.
+ This also means that such entities always consists of a whole number
+ of tokens.
+
+ - There are some "nesting constraints":
+ + General entities must match the 'content' production. Because of this,
+ the special token Begin_entity is inserted before the first token of
+ the entity, and End_entity is inserted just before the Eof token. The
+ brace Begin_entity...End_entity is recognized by the yacc parser, but
+ only in the 'content' production.
+ + External parameter entities must match 'extSubsetDecl'. Again,
+ Begin_entity and End_entity tokens embrace the inner token stream.
+ The brace Begin_entity...End_entity is recognized by the yacc parser
+ at the appropriate position.
+ (As general and parameter entities are used in different contexts
+ (document vs. DTD), both kinds of entities can use the same brace
+ Begin_entity...End_entity.)
+ + TODO:
+ The constraints for internal parameter entities are not yet checked.
+
+ - Recursive references can be detected because entities must be opened
+ before the 'next_token' method can be invoked.
+
+----------------------------------------------------------------------
+The lexer layer
+----------------------------------------------------------------------
+
+There are five main lexers, and a number of auxiliary lexers. The five
+main lexers are:
+
+- Document (function scan_document):
+ Scans an XML document outside the DTD and outside the element instance.
+
+- Content (function scan_content):
+ Scans an element instance, but not within tags.
+
+- Within_tag (function scan_within_tag):
+ Scans within <...>, i.e. a tag denoting an element instance.
+
+- Document_type (function scan_document_type):
+ Scans after <!DOCTYPE until the corresponding >.
+
+- Declaration (function scan_declaration):
+ Scans sequences of declarations
+
+Why several lexers? Because there are different lexical rules in these
+five regions of an XML document.
+
+Every lexer not only produces tokens, but also the name of the next lexer
+to use. For example, if the Document lexer scans "<!DOCTYPE", it also
+outputs that the next token must be scanned by Document_type.
+
+It is interesting that this really works. The beginning of every lexical
+context can be recognized by the lexer of the previous context, and there
+is always a token that unambigously indicates that the context ends.
+
+----------------------------------------------------------------------
+The DTD object
+----------------------------------------------------------------------
+
+There is usually one object that collects DTD declarations. All kinds of
+declarations are entered here:
+
+- element and attribute list declarations
+- entity declarations
+- notation declarations
+
+Some properties are validated directly after a declarations has been added
+to the DTD, but most validation is done by a 'validate' method.
+
+The result of 'validate' is stored such that another invocation is cheap.
+A DTD becomes again 'unchecked' if another declaration is added.
+
+TODO: We need a special DTD object that allows every content.
+
+The DTD object is known by more or less every other object, i.e. entities
+know the DTD, element declarations and instances know the DTD, and so on.
+
+TODO: We need a method that deletes all entity declarations once the DTD
+is complete (to free memory).
+
+----------------------------------------------------------------------
+Element and Document objects
+----------------------------------------------------------------------
+
+The 'element' objects form the tree of the element instances.
+
+The 'document' object is a derivate of 'element' where properties of the
+whole document can be stored.
+
+New element objects are NOT created by the "new class" mechanism, but
+instead by an exemplar/instance scheme: A new instance is the duplicate
+of an exemplar. This has the advantage that the user can provide own
+classes for the element instances. A hashtable contains the exemplars
+for every element type (tag name), and there is a default exemplar.
+The user can configure this hashtable such that for elements A objects
+of class element_a, for elements B objects of class element_b and so on
+are used.
+
+The object for the root element must already be created before parsing
+starts, and the parser returns the (filled) root object. Because of this,
+the user determines the *static* type of the object without the need
+of back coercion (which is not possible in Ocaml).
+
+----------------------------------------------------------------------
+Newline normalization
+----------------------------------------------------------------------
+
+The XML spec states that all of \n, \r, and \r\n must be recognized
+as newline characters/character sequences. Notes:
+- The replacement text of entities always contains the orginal text,
+ i.e. \r and \r\n are NOT converted to \n.
+ It is unclear if this is a violation of the standard or not.
+- Content of elements: Newline characters are converted to \n.
+- Attribute values: Newline characters are converted to spaces.
+- Processing instructions: Newline characters are not converted.
+ It is unclear if this is a violation of the standard or not.
+
+----------------------------------------------------------------------
+Empty entities
+----------------------------------------------------------------------
+
+Many entities are artificially surrounded by a Begin_entity/End_entity pair.
+This is sometimes not done if the entity is empty:
+
+- External parameter entities are parsed entities, i.e. they must match
+ the markupdecl* production. If they are not empty, the Begin_entity/End_entity
+ trick guarantees that they match markupdecl+, and that they are only
+ referred to at positions where markupdecl+ is allowed.
+ If they are empty, they are allowed everywhere just like internal
+ parameter entities. Because of this, the Begin_entity/End_entity pair
+ is dropped.
+
+- This does not apply to parameter entities (either external or internal)
+ which are referred to in the internal subset, nor applies to internal
+ parameter entities, nor applies to general entities:
+
+ + References in the internal subset are only allowed at positions where
+ markupdecl can occur, so Begin_entity/End_entity is added even if the
+ entity is empty.
+ + References to internal parameter entities are allowed anywhere, so
+ never Begin_entity/End_entity is added.
+ + References to general entities: An empty Begin_entity/End_entity pair
+ is recognized by the yacc parser, so special handling is not required.
+ Moreover, there is the situation that an empty entity is referred to
+ after the toplevel element:
+ <!DOCTYPE doc ...[
+ <!ENTITY empty "">
+ ]>
+ <doc></doc>∅
+ - This is illegal, and the presence of an empty Begin_entity/End_entity pair
+ helps to recognize this.
--- /dev/null
+<?xml encoding="ISO-8859-1"?>
+
+<!-- ************************************************************ -->
+<!-- EXTERNAL URLs -->
+<!-- ************************************************************ -->
+
+<!ENTITY url.ocaml
+ "http://caml.inria.fr/">
+
+<!ENTITY url.ocaml.list
+ "http://caml.inria.fr/caml-list-eng.html">
+
+<!ENTITY url.ocaml.download
+ "ftp://ftp.inria.fr/lang/caml-light/">
+
+<!ENTITY url.ocaml.camlp4
+ "http://caml.inria.fr/camlp4/">
+
+<!ENTITY url.ocaml.hump
+ "http://caml.inria.fr/hump.html">
+
+<!ENTITY url.ocaml.mottl
+ "http://miss.wu-wien.ac.at/~mottl/ocaml_sources/intro.html">
+
+<!ENTITY url.ocaml.mottl.pcre
+ "http://miss.wu-wien.ac.at/~mottl/ocaml_sources/pcre_ocaml.tar.gz">
+
+<!ENTITY url.ocaml.lindig
+ "http://www.cs.tu-bs.de/softech/people/lindig/software/index.html">
+
+<!ENTITY url.ocaml.lindig.ocmarkup
+ "http://www.cs.tu-bs.de/softech/people/lindig/software/ocmarkup.html">
+
+<!ENTITY url.ocaml.lindig.tony
+ "http://www.cs.tu-bs.de/softech/people/lindig/software/tony.html">
+
+<!ENTITY url.ocaml.filliatre
+ "http://www.lri.fr/~filliatr/software.en.html">
+
+<!ENTITY url.ocaml.filliatre.cgi
+ "http://www.lri.fr/~filliatr/ftp/ocaml/cgi/">
+
+<!ENTITY url.xml-spec
+ "http://www.w3.org/TR/1998/REC-xml-19980210.html">
+
+<!ENTITY url.xml.oasis
+ "http://www.oasis-open.org/cover/">
+
+<!ENTITY url.xml.w3c
+ "http://www.w3c.org/XML/">
+
+<!ENTITY url.jclark-xmltdata
+ "ftp://ftp.jclark.com/pub/xml/xmltest.zip">
+
+<!ENTITY urlprefix.ietf.rfc
+ "http://www.ietf.org/rfc">
+ <!-- Ohne "/" am Ende! -->
+
+<!ENTITY url.apache
+ "http://www.apache.org/">
+
+
+<!-- ************************************************************ -->
+<!-- MY URLs -->
+<!-- ************************************************************ -->
+
+<!ENTITY url.linkdb
+ "http://www.npc.de/ocaml/linkdb">
+
+<!-- ************************************************************ -->
+<!-- HOMEPAGE URLs -->
+<!-- ************************************************************ -->
+
+<!-- GENERIC -->
+
+<!ENTITY url.gps-ocaml-download
+ "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/ocaml">
+
+<!ENTITY url.gps-ocaml-projects
+ "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/ocaml/projects">
+
+<!ENTITY url.gps-old-download
+ "http://people.darmstadt.netsurf.de/Gerd.Stolpmann/download">
+
+
+<!-- SPECIFIC -->
+
+<!ENTITY release.findlib
+ "SOME-VERSION">
+
+<!ENTITY url.findlib-download
+ "&url.gps-ocaml-download;/findlib-&release.findlib;.tar.gz">
+
+<!ENTITY url.findlib-project
+ "&url.gps-ocaml-projects;/findlib/">
+
+<!ENTITY url.findlib-manual
+ "&url.gps-ocaml-projects;/findlib/">
+
+
+
+<!ENTITY release.markup
+ "SOME-VERSION">
+
+<!ENTITY url.markup-download
+ "&url.gps-ocaml-download;/markup-&release.markup;.tar.gz">
+
+<!ENTITY url.markup-project
+ "&url.gps-ocaml-projects;/markup">
+
+<!ENTITY url.markup-manual
+ "&url.gps-ocaml-projects;/markup/manual">
+
+
+<!-- ************************************************************ -->
+<!-- MAIL URLs -->
+<!-- ************************************************************ -->
+
+<!ENTITY person.gps '<a href="mailto:&person.gps.mail;">Gerd Stolpmann</a>'>
+
+<!ENTITY person.gps.mail
+ "Gerd.Stolpmann@darmstadt.netsurf.de">
+
--- /dev/null
+DOCBOOK_HTML = /usr/share/sgml/docbkdsl/html
+DOCBOOK_PRINT = /usr/share/sgml/docbkdsl/print
+SRC = $(PWD)/src
+
+.PHONY: html ps
+
+default: html ps
+
+html: html/book1.htm html/pic/done
+
+ps: ps/markup.ps ps/pic/done
+
+
+src/readme.ent: ../../examples/readme/to_html.ml
+ src/getcode.ml <../../examples/readme/to_html.ml >src/readme.ent
+
+src/yacc.mli.ent: ../../pxp_yacc.mli
+ src/getcode.ml <../../pxp_yacc.mli >src/yacc.mli.ent
+
+src/dtd.mli.ent: ../../pxp_dtd.mli
+ src/getcode.ml <../../pxp_dtd.mli >src/dtd.mli.ent
+
+html/book1.htm: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
+ mkdir -p html
+ cp src/markup.css html; \
+ cd html; \
+ rm -f *.htm*; \
+ jade -t sgml -D$(DOCBOOK_HTML) -D$(SRC) -ihtml markup.sgml; \
+ true
+ touch html/TIMESTAMP
+
+html/pic/done: src/pic/*.fig
+ mkdir -p html/pic
+ l=`cd src/pic; echo *.fig`; \
+ for x in $$l; do fig2dev -L gif src/pic/$$x html/pic/`basename $$x .fig`.gif; done
+ touch html/pic/done
+
+#man: src/findlib_reference.xml
+# mkdir -p man
+# cd man; \
+# rm -f *.[0-9]; \
+# db2man <../src/findlib_reference.xml
+
+ps/markup.tex: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent
+ mkdir -p ps
+ cd ps; \
+ jade -t tex -D$(DOCBOOK_PRINT) -D$(SRC) markup.sgml; \
+ true
+
+ps/markup.dvi: ps/markup.tex ps/pic/done
+ cd ps; \
+ jadetex markup.tex; \
+ jadetex markup.tex; \
+ jadetex markup.tex
+
+ps/markup.ps: ps/markup.dvi
+ cd ps; \
+ dvips -f <markup.dvi >markup.ps
+
+ps/pic/done: src/pic/*.fig
+ mkdir -p ps/pic
+ l=`cd src/pic; echo *.fig`; \
+ for x in $$l; do fig2dev -L ps -m 0.8 src/pic/$$x ps/pic/`basename $$x .fig`.ps; done
+ touch ps/pic/done
+
+.SUFFIXES: .xml .sgml
+
+.sgml.xml:
+ sx -xndata $< >$@; true
+
+
+
+clean:
+ rm -rf html man ps
+ rm -f src/readme.ent
+
+CLEAN: clean
+
+distclean:
+ rm -f src/*~
+ rm -f *~
+ rm -f ps/*.aux ps/*.dvi ps/*.log ps/*.tex
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Configuring and calling the parser</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="Details of the mapping from XML text to the tree representation"
+HREF="x1496.html"><LINK
+REL="NEXT"
+TITLE="Resolvers and sources"
+HREF="x1629.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1496.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1629.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN1567"
+>Chapter 4. Configuring and calling the parser</A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>4.1. <A
+HREF="c1567.html#AEN1569"
+>Overview</A
+></DT
+><DT
+>4.2. <A
+HREF="x1629.html"
+>Resolvers and sources</A
+></DT
+><DT
+>4.3. <A
+HREF="x1812.html"
+>The DTD classes</A
+></DT
+><DT
+>4.4. <A
+HREF="x1818.html"
+>Invoking the parser</A
+></DT
+><DT
+>4.5. <A
+HREF="x1965.html"
+>Updates</A
+></DT
+></DL
+></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1569"
+>4.1. Overview</A
+></H1
+><P
+>There are the following main functions invoking the parser (in Pxp_yacc):
+
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_document_entity:</I
+> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+the body is validated against the DTD. This mode is interesting if you have a
+file
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!DOCTYPE root ... [ ... ] > <root> ... </root></PRE
+>
+
+and you can accept any DTD that is included in the file (e.g. because the file
+is under your control).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_wfdocument_entity:</I
+> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+but the body is not validated, only checked for well-formedness. This mode is
+preferred if validation costs too much time or if the DTD is missing.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_dtd_entity:</I
+> You want only to
+parse an entity (file) containing the external subset of a DTD. Sometimes it is
+interesting to read such a DTD, for example to compare it with the DTD included
+in a document, or to apply the next mode:</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_content_entity:</I
+> You want only to
+parse an entity (file) containing a fragment of a document body; this fragment
+is validated against the DTD you pass to the function. Especially, the fragment
+must not have a <TT
+CLASS="LITERAL"
+> <!DOCTYPE></TT
+> clause, and must directly
+begin with an element. The element is validated against the DTD. This mode is
+interesting if you want to check documents against a fixed, immutable DTD.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>parse_wfcontent_entity:</I
+> This function
+also parses a single element without DTD, but does not validate it.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>extract_dtd_from_document_entity:</I
+> This
+function extracts the DTD from a closed document consisting of a DTD and a
+document body. Both the internal and the external subsets are extracted.</P
+></LI
+></UL
+></P
+><P
+>In many cases, <TT
+CLASS="LITERAL"
+>parse_document_entity</TT
+> is the preferred mode
+to parse a document in a validating way, and
+<TT
+CLASS="LITERAL"
+>parse_wfdocument_entity</TT
+> is the mode of choice to parse a
+file while only checking for well-formedness.</P
+><P
+>There are a number of variations of these modes. One important application of a
+parser is to check documents of an untrusted source against a fixed DTD. One
+solution is to not allow the <TT
+CLASS="LITERAL"
+><!DOCTYPE></TT
+> clause in
+these documents, and treat the document like a fragment (using mode
+<I
+CLASS="EMPHASIS"
+>parse_content_entity</I
+>). This is very simple, but
+inflexible; users of such a system cannot even define additional entities to
+abbreviate frequent phrases of their text.</P
+><P
+>It may be necessary to have a more intelligent checker. For example, it is also
+possible to parse the document to check fully, i.e. with DTD, and to compare
+this DTD with the prescribed one. In order to fully parse the document, mode
+<I
+CLASS="EMPHASIS"
+>parse_document_entity</I
+> is applied, and to get the DTD to
+compare with mode <I
+CLASS="EMPHASIS"
+>parse_dtd_entity</I
+> can be used.</P
+><P
+>There is another very important configurable aspect of the parser: the
+so-called resolver. The task of the resolver is to locate the contents of an
+(external) entity for a given entity name, and to make the contents accessible
+as a character stream. (Furthermore, it also normalizes the character set;
+but this is a detail we can ignore here.) Consider you have a file called
+<TT
+CLASS="LITERAL"
+>"main.xml"</TT
+> containing
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % sub SYSTEM "sub/sub.xml">
+%sub;</PRE
+>
+
+and a file stored in the subdirectory <TT
+CLASS="LITERAL"
+>"sub"</TT
+> with name
+<TT
+CLASS="LITERAL"
+>"sub.xml"</TT
+> containing
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % subsub SYSTEM "subsub/subsub.xml">
+%subsub;</PRE
+>
+
+and a file stored in the subdirectory <TT
+CLASS="LITERAL"
+>"subsub"</TT
+> of
+<TT
+CLASS="LITERAL"
+>"sub"</TT
+> with name <TT
+CLASS="LITERAL"
+>"subsub.xml"</TT
+> (the
+contents of this file do not matter). Here, the resolver must track that
+the second entity <TT
+CLASS="LITERAL"
+>subsub</TT
+> is located in the directory
+<TT
+CLASS="LITERAL"
+>"sub/subsub"</TT
+>, i.e. the difficulty is to interpret the
+system (file) names of entities relative to the entities containing them,
+even if the entities are deeply nested.</P
+><P
+>There is not a fixed resolver already doing everything right - resolving entity
+names is a task that highly depends on the environment. The XML specification
+only demands that <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> entities are interpreted like URLs
+(which is not very precise, as there are lots of URL schemes in use), hoping
+that this helps overcoming the local peculiarities of the environment; the idea
+is that if you do not know your environment you can refer to other entities by
+denoting URLs for them. I think that this interpretation of
+<TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> names may have some applications in the internet, but
+it is not the first choice in general. Because of this, the resolver is a
+separate module of the parser that can be exchanged by another one if
+necessary; more precisely, the parser already defines several resolvers.</P
+><P
+>The following resolvers do already exist:
+
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>Resolvers reading from arbitrary input channels. These
+can be configured such that a certain ID is associated with the channel; in
+this case inner references to external entities can be resolved. There is also
+a special resolver that interprets SYSTEM IDs as URLs; this resolver can
+process relative SYSTEM names and determine the corresponding absolute URL.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>A resolver that reads always from a given O'Caml
+string. This resolver is not able to resolve further names unless the string is
+not associated with any name, i.e. if the document contained in the string
+refers to an external entity, this reference cannot be followed in this
+case.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>A resolver for file names. The <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+>
+name is interpreted as file URL with the slash "/" as separator for
+directories. - This resolver is derived from the generic URL resolver.</P
+></LI
+></UL
+>
+
+The interface a resolver must have is documented, so it is possible to write
+your own resolver. For example, you could connect the parser with an HTTP
+client, and resolve URLs of the HTTP namespace. The resolver classes support
+that several independent resolvers are combined to one more powerful resolver;
+thus it is possible to combine a self-written resolver with the already
+existing resolvers.</P
+><P
+>Note that the existing resolvers only interpret <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+>
+names, not <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> names. If it helps you, it is possible to
+define resolvers for <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> names, too; for example, such a
+resolver could look up the public name in a hash table, and map it to a system
+name which is passed over to the existing resolver for system names. It is
+relatively simple to provide such a resolver.</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1496.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1629.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Details of the mapping from XML text to the tree representation</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Resolvers and sources</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>What is XML?</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="NEXT"
+TITLE="Highlights of XML"
+HREF="x107.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="p34.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x107.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN36"
+>Chapter 1. What is XML?</A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>1.1. <A
+HREF="c36.html#AEN38"
+>Introduction</A
+></DT
+><DT
+>1.2. <A
+HREF="x107.html"
+>Highlights of XML</A
+></DT
+><DT
+>1.3. <A
+HREF="x468.html"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</A
+></DT
+></DL
+></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN38"
+>1.1. Introduction</A
+></H1
+><P
+>XML (short for <I
+CLASS="EMPHASIS"
+>Extensible Markup Language</I
+>)
+generalizes the idea that text documents are typically structured in sections,
+sub-sections, paragraphs, and so on. The format of the document is not fixed
+(as, for example, in HTML), but can be declared by a so-called DTD (document
+type definition). The DTD describes only the rules how the document can be
+structured, but not how the document can be processed. For example, if you want
+to publish a book that uses XML markup, you will need a processor that converts
+the XML file into a printable format such as Postscript. On the one hand, the
+structure of XML documents is configurable; on the other hand, there is no
+longer a canonical interpretation of the elements of the document; for example
+one XML DTD might want that paragraphes are delimited by
+<TT
+CLASS="LITERAL"
+>para</TT
+> tags, and another DTD expects <TT
+CLASS="LITERAL"
+>p</TT
+> tags
+for the same purpose. As a result, for every DTD a new processor is required.</P
+><P
+>Although XML can be used to express structured text documents it is not limited
+to this kind of application. For example, XML can also be used to exchange
+structured data over a network, or to simply store structured data in
+files. Note that XML documents cannot contain arbitrary binary data because
+some characters are forbidden; for some applications you need to encode binary
+data as text (e.g. the base 64 encoding).</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN45"
+>1.1.1. The "hello world" example</A
+></H2
+><P
+>The following example shows a very simple DTD, and a corresponding document
+instance. The document is structured such that it consists of sections, and
+that sections consist of paragraphs, and that paragraphs contain plain text:</P
+><PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT document (section)+>
+<!ELEMENT section (paragraph)+>
+<!ELEMENT paragraph (#PCDATA)></PRE
+><P
+>The following document is an instance of this DTD:</P
+><PRE
+CLASS="PROGRAMLISTING"
+><?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE document SYSTEM "simple.dtd">
+<document>
+ <section>
+ <paragraph>This is a paragraph of the first section.</paragraph>
+ <paragraph>This is another paragraph of the first section.</paragraph>
+ </section>
+ <section>
+ <paragraph>This is the only paragraph of the second section.</paragraph>
+ </section>
+</document></PRE
+><P
+>As in HTML (and, of course, in grand-father SGML), the "pieces" of
+the document are delimited by element braces, i.e. such a piece begins with
+<TT
+CLASS="LITERAL"
+><name-of-the-type-of-the-piece></TT
+> and ends with
+<TT
+CLASS="LITERAL"
+></name-of-the-type-of-the-piece></TT
+>, and the pieces are
+called <I
+CLASS="EMPHASIS"
+>elements</I
+>. Unlike HTML and SGML, both start tags and
+end tags (i.e. the delimiters written in angle brackets) can never be left
+out. For example, HTML calls the paragraphs simply <TT
+CLASS="LITERAL"
+>p</TT
+>, and
+because paragraphs never contain paragraphs, a sequence of several paragraphs
+can be written as:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><p>First paragraph
+<p>Second paragraph</PRE
+>
+
+This is not possible in XML; continuing our example above we must always write
+
+<PRE
+CLASS="PROGRAMLISTING"
+><paragraph>First paragraph</paragraph>
+<paragraph>Second paragraph</paragraph></PRE
+>
+
+The rationale behind that is to (1) simplify the development of XML parsers
+(you need not convert the DTD into a deterministic finite automaton which is
+required to detect omitted tags), and to (2) make it possible to parse the
+document independent of whether the DTD is known or not.</P
+><P
+>The first line of our sample document,
+
+<PRE
+CLASS="PROGRAMLISTING"
+><?xml version="1.0" encoding="ISO-8859-1"?></PRE
+>
+
+is the so-called <I
+CLASS="EMPHASIS"
+>XML declaration</I
+>. It expresses that the
+document follows the conventions of XML version 1.0, and that the document is
+encoded using characters from the ISO-8859-1 character set (often known as
+"Latin 1", mostly used in Western Europe). Although the XML declaration is not
+mandatory, it is good style to include it; everybody sees at the first glance
+that the document uses XML markup and not the similar-looking HTML and SGML
+markup languages. If you omit the XML declaration, the parser will assume
+that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
+it possible to distinguish between UTF-8 and UTF-16 automatically); these
+are encodings of Unicode's universal character set. (Note that <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>, unlike its
+predecessor "Markup", fully supports Unicode.)</P
+><P
+>The second line,
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!DOCTYPE document SYSTEM "simple.dtd"></PRE
+>
+
+names the DTD that is going to be used for the rest of the document. In
+general, it is possible that the DTD consists of two parts, the so-called
+external and the internal subset. "External" means that the DTD exists as a
+second file; "internal" means that the DTD is included in the same file. In
+this example, there is only an external subset, and the system identifier
+"simple.dtd" specifies where the DTD file can be found. System identifiers are
+interpreted as URLs; for instance this would be legal:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!DOCTYPE document SYSTEM "http://host/location/simple.dtd"></PRE
+>
+
+Please note that <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> cannot interpret HTTP identifiers by default, but it is
+possible to change the interpretation of system identifiers.</P
+><P
+>The word immediately following <TT
+CLASS="LITERAL"
+>DOCTYPE</TT
+> determines which of
+the declared element types (here "document", "section", and "paragraph") is
+used for the outermost element, the <I
+CLASS="EMPHASIS"
+>root element</I
+>. In this
+example it is <TT
+CLASS="LITERAL"
+>document</TT
+> because the outermost element is
+delimited by <TT
+CLASS="LITERAL"
+><document></TT
+> and
+<TT
+CLASS="LITERAL"
+></document></TT
+>. </P
+><P
+>The DTD consists of three declarations for element types:
+<TT
+CLASS="LITERAL"
+>document</TT
+>, <TT
+CLASS="LITERAL"
+>section</TT
+>, and
+<TT
+CLASS="LITERAL"
+>paragraph</TT
+>. Such a declaration has two parts:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>content-model</I
+></TT
+>></PRE
+>
+
+The content model is a regular expression which describes the possible inner
+structure of the element. Here, <TT
+CLASS="LITERAL"
+>document</TT
+> contains one or
+more sections, and a <TT
+CLASS="LITERAL"
+>section</TT
+> contains one or more
+paragraphs. Note that these two element types are not allowed to contain
+arbitrary text. Only the <TT
+CLASS="LITERAL"
+>paragraph</TT
+> element type is declared
+such that parsed character data (indicated by the symbol
+<TT
+CLASS="LITERAL"
+>#PCDATA</TT
+>) is permitted.</P
+><P
+>See below for a detailed discussion of content models. </P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN84"
+>1.1.2. XML parsers and processors</A
+></H2
+><P
+>XML documents are human-readable, but this is not the main purpose of this
+language. XML has been designed such that documents can be read by a program
+called an <I
+CLASS="EMPHASIS"
+>XML parser</I
+>. The parser checks that the document
+is well-formatted, and it represents the document as objects of the programming
+language. There are two aspects when checking the document: First, the document
+must follow some basic syntactic rules, such as that tags are written in angle
+brackets, that for every start tag there must be a corresponding end tag and so
+on. A document respecting these rules is
+<I
+CLASS="EMPHASIS"
+>well-formed</I
+>. Second, the document must match the DTD in
+which case the document is <I
+CLASS="EMPHASIS"
+>valid</I
+>. Many parsers check only
+on well-formedness and ignore the DTD; <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> is designed such that it can
+even validate the document.</P
+><P
+>A parser does not make a sensible application, it only reads XML
+documents. The whole application working with XML-formatted data is called an
+<I
+CLASS="EMPHASIS"
+>XML processor</I
+>. Often XML processors convert documents into
+another format, such as HTML or Postscript. Sometimes processors extract data
+of the documents and output the processed data again XML-formatted. The parser
+can help the application processing the document; for example it can provide
+means to access the document in a specific manner. <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> supports an
+object-oriented access layer specially.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN94"
+>1.1.3. Discussion</A
+></H2
+><P
+>As we have seen, there are two levels of description: On the one hand, XML can
+define rules about the format of a document (the DTD), on the other hand, XML
+expresses structured documents. There are a number of possible applications:</P
+><P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>XML can be used to express structured texts. Unlike HTML, there is no canonical
+interpretation; one would have to write a backend for the DTD that translates
+the structured texts into a format that existing browsers, printers
+etc. understand. The advantage of a self-defined document format is that it is
+possible to design the format in a more problem-oriented way. For example, if
+the task is to extract reports from a database, one can use a DTD that reflects
+the structure of the report or the database. A possible approach would be to
+have an element type for every database table and for every column. Once the
+DTD has been designed, the report procedure can be splitted up in a part that
+selects the database rows and outputs them as an XML document according to the
+DTD, and in a part that translates the document into other formats. Of course,
+the latter part can be solved in a generic way, e.g. there may be configurable
+backends for all DTDs that follow the approach and have element types for
+tables and columns.</P
+><P
+>XML plays the role of a configurable intermediate format. The database
+extraction function can be written without having to know the details of
+typesetting; the backends can be written without having to know the details of
+the database.</P
+><P
+>Of course, there are traditional solutions. One can define an ad hoc
+intermediate text file format. This disadvantage is that there are no names for
+the pieces of the format, and that such formats usually lack of documentation
+because of this. Another solution would be to have a binary representation,
+either as language-dependent or language-independent structure (example of the
+latter can be found in RPC implementations). The disadvantage is that it is
+harder to view such representations, one has to write pretty printers for this
+purpose. It is also more difficult to enter test data; XML is plain text that
+can be written using an arbitrary editor (Emacs has even a good XML mode,
+PSGML). All these alternatives suffer from a missing structure checker,
+i.e. the programs processing these formats usually do not check the input file
+or input object in detail; XML parsers check the syntax of the input (the
+so-called well-formedness check), and the advanced parsers like <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> even
+verify that the structure matches the DTD (the so-called validation).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>XML can be used as configurable communication language. A fundamental problem
+of every communication is that sender and receiver must follow the same
+conventions about the language. For data exchange, the question is usually
+which data records and fields are available, how they are syntactically
+composed, and which values are possible for the various fields. Similar
+questions arise for text document exchange. XML does not answer these problems
+completely, but it reduces the number of ambiguities for such conventions: The
+outlines of the syntax are specified by the DTD (but not necessarily the
+details), and XML introduces canonical names for the components of documents
+such that it is simpler to describe the rest of the syntax and the semantics
+informally.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>XML is a data storage format. Currently, every software product tends to use
+its own way to store data; commercial software often does not describe such
+formats, and it is a pain to integrate such software into a bigger project.
+XML can help to improve this situation when several applications share the same
+syntax of data files. DTDs are then neutral instances that check the format of
+data files independent of applications. </P
+></LI
+></UL
+></DIV
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x107.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>User's guide</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Highlights of XML</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Using PXP</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="A complete example: The readme DTD"
+HREF="x468.html"><LINK
+REL="NEXT"
+TITLE="How to parse a document from an application"
+HREF="x550.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x468.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x550.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN533"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>2.1. <A
+HREF="c533.html#AEN536"
+>Validation</A
+></DT
+><DT
+>2.2. <A
+HREF="x550.html"
+>How to parse a document from an application</A
+></DT
+><DT
+>2.3. <A
+HREF="x675.html"
+>Class-based processing of the node tree</A
+></DT
+><DT
+>2.4. <A
+HREF="x738.html"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</A
+></DT
+></DL
+></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN536"
+>2.1. Validation</A
+></H1
+><P
+>The parser can be used to <I
+CLASS="EMPHASIS"
+>validate</I
+> a document. This means
+that all the constraints that must hold for a valid document are actually
+checked. Validation is the default mode of <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>, i.e. every document is
+validated while it is being parsed.</P
+><P
+>In the <TT
+CLASS="LITERAL"
+>examples</TT
+> directory of the distribution you find the
+<TT
+CLASS="LITERAL"
+>pxpvalidate</TT
+> application. It is invoked in the following way:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>pxpvalidate [ -wf ] <TT
+CLASS="REPLACEABLE"
+><I
+>file</I
+></TT
+>...</PRE
+>
+
+The files mentioned on the command line are validated, and every warning and
+every error messages are printed to stderr.</P
+><P
+>The -wf switch modifies the behaviour such that a well-formedness parser is
+simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
+DTD are ignored, and only the ENTITY declarations will take effect. This mode
+is intended for documents lacking a DTD. Please note that the parser still
+scans the DTD fully and will report all errors in the DTD; such checks are not
+required by a well-formedness parser.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>pxpvalidate</TT
+> application is the simplest sensible program
+using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>, you may consider it as "hello world" program. </P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x468.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x550.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>How to parse a document from an application</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>The objects representing the document</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="PREVIOUS"
+TITLE="Example: An HTML backend for the readme
+DTD"
+HREF="x738.html"><LINK
+REL="NEXT"
+TITLE="The class type node"
+HREF="x939.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="CHAPTER"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x738.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x939.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="CHAPTER"
+><H1
+><A
+NAME="AEN893"
+>Chapter 3. The objects representing the document</A
+></H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>3.1. <A
+HREF="c893.html#AEN897"
+>The <TT
+CLASS="LITERAL"
+>document</TT
+> class</A
+></DT
+><DT
+>3.2. <A
+HREF="x939.html"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></DT
+><DT
+>3.3. <A
+HREF="x1439.html"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></A
+></DT
+><DT
+>3.4. <A
+HREF="x1496.html"
+>Details of the mapping from XML text to the tree representation</A
+></DT
+></DL
+></DIV
+><P
+><I
+CLASS="EMPHASIS"
+>This description might be out-of-date. See the module interface files
+for updated information.</I
+></P
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN897"
+>3.1. The <TT
+CLASS="LITERAL"
+>document</TT
+> class</A
+></H1
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class [ 'ext ] document :
+ Pxp_types.collect_warnings ->
+ object
+ method init_xml_version : string -> unit
+ method init_root : 'ext node -> unit
+
+ method xml_version : string
+ method xml_standalone : bool
+ method dtd : dtd
+ method root : 'ext node
+
+ method encoding : Pxp_types.rep_encoding
+
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ end
+;;</PRE
+>
+
+The methods beginning with <TT
+CLASS="LITERAL"
+>init_</TT
+> are only for internal use
+of the parser.</P
+><P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>xml_version</TT
+>: returns the version string at the beginning of
+the document. For example, "1.0" is returned if the document begins with
+<TT
+CLASS="LITERAL"
+><?xml version="1.0"?></TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>xml_standalone</TT
+>: returns the boolean value of
+<TT
+CLASS="LITERAL"
+>standalone</TT
+> declaration in the XML declaration. If the
+<TT
+CLASS="LITERAL"
+>standalone</TT
+> attribute is missing, <TT
+CLASS="LITERAL"
+>false</TT
+> is
+returned. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>dtd</TT
+>: returns a reference to the global DTD object.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>root</TT
+>: returns a reference to the root element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>encoding</TT
+>: returns the internal encoding of the
+document. This means that all strings of which the document consists are
+encoded in this character set.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr</TT
+>: returns the processing instructions outside the DTD
+and outside the root element. The argument passed to the method names a
+<I
+CLASS="EMPHASIS"
+>target</I
+>, and the method returns all instructions with this
+target. The target is the first word inside <TT
+CLASS="LITERAL"
+><?</TT
+> and
+<TT
+CLASS="LITERAL"
+>?></TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr_names</TT
+>: returns the names of the processing instructions</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>add_pinstr</TT
+>: adds another processing instruction. This method
+is used by the parser itself to enter the instructions returned by
+<TT
+CLASS="LITERAL"
+>pinstr</TT
+>, but you can also enter additional instructions.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>write</TT
+>: writes the document to the passed stream as XML
+text using the passed (external) encoding. The generated text is always valid
+XML and can be parsed by PXP; however, the text is badly formatted (this is not
+a pretty printer).</P
+></LI
+></UL
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x738.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x939.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>The PXP user's guide</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="NEXT"
+TITLE="User's guide"
+HREF="p34.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="BOOK"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="BOOK"
+><A
+NAME="AEN1"
+></A
+><DIV
+CLASS="TITLEPAGE"
+><H1
+CLASS="TITLE"
+><A
+NAME="AEN1"
+>The PXP user's guide</A
+></H1
+><H3
+CLASS="AUTHOR"
+>Gerd Stolpmann</H3
+><P
+CLASS="COPYRIGHT"
+>Copyright © 1999, 2000 by <SPAN
+CLASS="HOLDER"
+>Gerd Stolpmann</SPAN
+></P
+><DIV
+><DIV
+CLASS="ABSTRACT"
+><P
+></P
+><P
+><SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> is a validating parser for XML-1.0 which has been
+written entirely in Objective Caml.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><H1
+CLASS="TITLE"
+><A
+NAME="AEN18"
+>Download <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>:</A
+></H1
+>The free <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> library can be downloaded at
+<A
+HREF="http://www.ocaml-programming.de/packages/"
+TARGET="_top"
+>http://www.ocaml-programming.de/packages/</A
+>. This user's guide is included.
+Newest releases of <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> will be announced in
+<A
+HREF="http://www.npc.de/ocaml/linkdb/"
+TARGET="_top"
+>The OCaml Link
+Database</A
+>.</P
+></DIV
+><P
+></P
+></DIV
+></DIV
+><DIV
+CLASS="LEGALNOTICE"
+><P
+><B
+>License</B
+></P
+><P
+>This document, and the described software, "<SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>", are copyright by
+Gerd Stolpmann. </P
+><P
+>Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "<SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:</P
+><P
+>The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.</P
+><P
+>The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.</P
+></DIV
+><HR></DIV
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>I. <A
+HREF="p34.html"
+>User's guide</A
+></DT
+><DD
+><DL
+><DT
+>1. <A
+HREF="c36.html"
+>What is XML?</A
+></DT
+><DD
+><DL
+><DT
+>1.1. <A
+HREF="c36.html#AEN38"
+>Introduction</A
+></DT
+><DT
+>1.2. <A
+HREF="x107.html"
+>Highlights of XML</A
+></DT
+><DT
+>1.3. <A
+HREF="x468.html"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</A
+></DT
+></DL
+></DD
+><DT
+>2. <A
+HREF="c533.html"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></A
+></DT
+><DD
+><DL
+><DT
+>2.1. <A
+HREF="c533.html#AEN536"
+>Validation</A
+></DT
+><DT
+>2.2. <A
+HREF="x550.html"
+>How to parse a document from an application</A
+></DT
+><DT
+>2.3. <A
+HREF="x675.html"
+>Class-based processing of the node tree</A
+></DT
+><DT
+>2.4. <A
+HREF="x738.html"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</A
+></DT
+></DL
+></DD
+><DT
+>3. <A
+HREF="c893.html"
+>The objects representing the document</A
+></DT
+><DD
+><DL
+><DT
+>3.1. <A
+HREF="c893.html#AEN897"
+>The <TT
+CLASS="LITERAL"
+>document</TT
+> class</A
+></DT
+><DT
+>3.2. <A
+HREF="x939.html"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></DT
+><DT
+>3.3. <A
+HREF="x1439.html"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></A
+></DT
+><DT
+>3.4. <A
+HREF="x1496.html"
+>Details of the mapping from XML text to the tree representation</A
+></DT
+></DL
+></DD
+><DT
+>4. <A
+HREF="c1567.html"
+>Configuring and calling the parser</A
+></DT
+><DD
+><DL
+><DT
+>4.1. <A
+HREF="c1567.html#AEN1569"
+>Overview</A
+></DT
+><DT
+>4.2. <A
+HREF="x1629.html"
+>Resolvers and sources</A
+></DT
+><DT
+>4.3. <A
+HREF="x1812.html"
+>The DTD classes</A
+></DT
+><DT
+>4.4. <A
+HREF="x1818.html"
+>Invoking the parser</A
+></DT
+><DT
+>4.5. <A
+HREF="x1965.html"
+>Updates</A
+></DT
+></DL
+></DD
+></DL
+></DD
+></DL
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+> </TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+> </TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="p34.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+> </TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+> </TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>User's guide</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+.acronym {
+ font-weight: bold;
+ color: #c71585
+}
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>User's guide</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="PREVIOUS"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="NEXT"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="PART"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="index.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c36.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="PART"
+><A
+NAME="AEN34"
+></A
+><DIV
+CLASS="TITLEPAGE"
+><H1
+CLASS="TITLE"
+>I. User's guide</H1
+><DIV
+CLASS="TOC"
+><DL
+><DT
+><B
+>Table of Contents</B
+></DT
+><DT
+>1. <A
+HREF="c36.html"
+>What is XML?</A
+></DT
+><DT
+>2. <A
+HREF="c533.html"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></A
+></DT
+><DT
+>3. <A
+HREF="c893.html"
+>The objects representing the document</A
+></DT
+><DT
+>4. <A
+HREF="c1567.html"
+>Configuring and calling the parser</A
+></DT
+></DL
+></DIV
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="index.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The PXP user's guide</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+> </TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>What is XML?</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Highlights of XML</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="PREVIOUS"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="NEXT"
+TITLE="A complete example: The readme DTD"
+HREF="x468.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c36.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 1. What is XML?</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x468.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN107"
+>1.2. Highlights of XML</A
+></H1
+><P
+>This section explains many of the features of XML, but not all, and some
+features not in detail. For a complete description, see the <A
+HREF="http://www.w3.org/TR/1998/REC-xml-19980210.html"
+TARGET="_top"
+>XML
+specification</A
+>.</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN111"
+>1.2.1. The DTD and the instance</A
+></H2
+><P
+>The DTD contains various declarations; in general you can only use a feature if
+you have previously declared it. The document instance file may contain the
+full DTD, but it is also possible to split the DTD into an internal and an
+external subset. A document must begin as follows if the full DTD is included:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?>
+<!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+> [
+ <TT
+CLASS="REPLACEABLE"
+><I
+>Declarations</I
+></TT
+>
+]></PRE
+>
+
+These declarations are called the <I
+CLASS="EMPHASIS"
+>internal subset</I
+>. Note
+that the usage of entities and conditional sections is restricted within the
+internal subset.</P
+><P
+>If the declarations are located in a different file, you can refer to this file
+as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?>
+<!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>"></PRE
+>
+
+The declarations in the file are called the <I
+CLASS="EMPHASIS"
+>external
+subset</I
+>. The file name is called the <I
+CLASS="EMPHASIS"
+>system
+identifier</I
+>.
+It is also possible to refer to the file by a so-called
+<I
+CLASS="EMPHASIS"
+>public identifier</I
+>, but most XML applications won't use
+this feature.</P
+><P
+>You can also specify both internal and external subsets. In this case, the
+declarations of both subsets are mixed, and if there are conflicts, the
+declaration of the internal subset overrides those of the external subset with
+the same name. This looks as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?>
+<!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>" [
+ <TT
+CLASS="REPLACEABLE"
+><I
+>Declarations</I
+></TT
+>
+]></PRE
+></P
+><P
+>The XML declaration (the string beginning with <TT
+CLASS="LITERAL"
+><?xml</TT
+> and
+ending at <TT
+CLASS="LITERAL"
+>?></TT
+>) should specify the encoding of the
+file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
+that every file parsed by the XML processor can begin with an XML declaration
+and that every file may have its own encoding.</P
+><P
+>The name of the root element must be mentioned directly after the
+<TT
+CLASS="LITERAL"
+>DOCTYPE</TT
+> string. This means that a full document instance
+looks like
+
+<PRE
+CLASS="PROGRAMLISTING"
+><?xml version="1.0" encoding="<TT
+CLASS="REPLACEABLE"
+><I
+>Your encoding</I
+></TT
+>"?>
+<!DOCTYPE <TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>" [
+ <TT
+CLASS="REPLACEABLE"
+><I
+>Declarations</I
+></TT
+>
+]>
+
+<<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>>
+ <TT
+CLASS="REPLACEABLE"
+><I
+>inner contents</I
+></TT
+>
+</<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>></PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN146"
+>1.2.2. Reserved characters</A
+></H2
+><P
+>Some characters are generally reserved to indicate markup such that they cannot
+be used for character data. These characters are <, >, and
+&. Furthermore, single and double quotes are sometimes reserved. If you
+want to include such a character as character, write it as follows:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&lt;</TT
+> instead of <</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&gt;</TT
+> instead of ></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&amp;</TT
+> instead of &</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&apos;</TT
+> instead of '</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>&quot;</TT
+> instead of "</P
+></LI
+></UL
+>
+
+All other characters are free in the document instance. It is possible to
+include a character by its position in the Unicode alphabet:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;</PRE
+>
+
+where <TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+> is the decimal number of the
+character. Alternatively, you can specify the character by its hexadecimal
+number:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&#x<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;</PRE
+>
+
+In the scope of declarations, the character % is no longer free. To include it
+as character, you must use the notations <TT
+CLASS="LITERAL"
+>&#37;</TT
+> or
+<TT
+CLASS="LITERAL"
+>&#x25;</TT
+>.</P
+><P
+>Note that besides &lt;, &gt;, &amp;,
+&apos;, and &quot; there are no predefines character entities. This is
+different from HTML which defines a list of characters that can be referenced
+by name (e.g. &auml; for ä); however, if you prefer named characters, you
+can declare such entities yourself (see below).</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN173"
+>1.2.3. Elements and ELEMENT declarations</A
+></H2
+><P
+>Elements structure the document instance in a hierarchical way. There is a
+top-level element, the <I
+CLASS="EMPHASIS"
+>root element</I
+>, which contains a
+sequence of inner elements and character sections. The inner elements are
+structured in the same way. Every element has an <I
+CLASS="EMPHASIS"
+>element
+type</I
+>. The beginning of the element is indicated by a <I
+CLASS="EMPHASIS"
+>start
+tag</I
+>, written
+
+<PRE
+CLASS="PROGRAMLISTING"
+><<TT
+CLASS="REPLACEABLE"
+><I
+>element-type</I
+></TT
+>></PRE
+>
+
+and the element continues until the corresponding <I
+CLASS="EMPHASIS"
+>end tag</I
+>
+is reached:
+
+<PRE
+CLASS="PROGRAMLISTING"
+></<TT
+CLASS="REPLACEABLE"
+><I
+>element-type</I
+></TT
+>></PRE
+>
+
+In XML, it is not allowed to omit start or end tags, even if the DTD would
+permit this. Note that there are no special rules how to interpret spaces or
+newlines near start or end tags; all spaces and newlines count.</P
+><P
+>Every element type must be declared before it can be used. The declaration
+consists of two parts: the ELEMENT declaration describes the content model,
+i.e. which inner elements are allowed; the ATTLIST declaration describes the
+attributes of the element.</P
+><P
+>An element can simply allow everything as content. This is written:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> ANY></PRE
+>
+
+On the opposite, an element can be forced to be empty; declared by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> EMPTY></PRE
+>
+
+Note that there is an abbreviated notation for empty element instances:
+<TT
+CLASS="LITERAL"
+><<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>/></TT
+>. </P
+><P
+>There are two more sophisticated forms of declarations: so-called
+<I
+CLASS="EMPHASIS"
+>mixed declarations</I
+>, and <I
+CLASS="EMPHASIS"
+>regular
+expressions</I
+>. An element with mixed content contains character data
+interspersed with inner elements, and the set of allowed inner elements can be
+specified. In contrast to this, a regular expression declaration does not allow
+character data, but the inner elements can be described by the more powerful
+means of regular expressions.</P
+><P
+>A declaration for mixed content looks as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> (#PCDATA | <TT
+CLASS="REPLACEABLE"
+><I
+>element<SUB
+>1</SUB
+></I
+></TT
+> | ... | <TT
+CLASS="REPLACEABLE"
+><I
+>element<SUB
+>n</SUB
+></I
+></TT
+> )*></PRE
+>
+
+or if you do not want to allow any inner element, simply
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> (#PCDATA)></PRE
+></P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>If element type <TT
+CLASS="LITERAL"
+>q</TT
+> is declared as
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT q (#PCDATA | r | s)*></PRE
+>
+
+this is a legal instance:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><q>This is character data<r></r>with <s></s>inner elements</q></PRE
+>
+
+But this is illegal because <TT
+CLASS="LITERAL"
+>t</TT
+> has not been enumerated in the
+declaration:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><q>This is character data<r></r>with <t></t>inner elements</q></PRE
+></P
+></BLOCKQUOTE
+><P
+>The other form uses a regular expression to describe the possible contents:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>regexp</I
+></TT
+>></PRE
+>
+
+The following well-known regexp operators are allowed:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+></TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>(<TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>1</SUB
+></I
+></TT
+> ,</TT
+> ... <TT
+CLASS="LITERAL"
+>, <TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>n</SUB
+></I
+></TT
+> )</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>(<TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>1</SUB
+></I
+></TT
+> |</TT
+> ... <TT
+CLASS="LITERAL"
+>| <TT
+CLASS="REPLACEABLE"
+><I
+>subexpr<SUB
+>n</SUB
+></I
+></TT
+> )</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>*</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>+</TT
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>?</TT
+></P
+></LI
+></UL
+>
+
+The <TT
+CLASS="LITERAL"
+>,</TT
+> operator indicates a sequence of sub-models, the
+<TT
+CLASS="LITERAL"
+>|</TT
+> operator describes alternative sub-models. The
+<TT
+CLASS="LITERAL"
+>*</TT
+> indicates zero or more repetitions, and
+<TT
+CLASS="LITERAL"
+>+</TT
+> one or more repetitions. Finally, <TT
+CLASS="LITERAL"
+>?</TT
+> can
+be used for optional sub-models. As atoms the regexp can contain names of
+elements; note that it is not allowed to include <TT
+CLASS="LITERAL"
+>#PCDATA</TT
+>.</P
+><P
+>The exact syntax of the regular expressions is rather strange. This can be
+explained best by a list of constraints:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>The outermost expression must not be
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+></TT
+>. </P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<TT
+CLASS="LITERAL"
+><!ELEMENT x y></TT
+>; this must be written as
+<TT
+CLASS="LITERAL"
+><!ELEMENT x (y)></TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>For the unary operators <TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>*</TT
+>,
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>+</TT
+>, and
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+>?</TT
+>, the
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>subexpr</I
+></TT
+></TT
+> must not be again an
+unary operator.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<TT
+CLASS="LITERAL"
+><!ELEMENT x y**></TT
+>; this must be written as
+<TT
+CLASS="LITERAL"
+><!ELEMENT x (y*)*></TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>Between <TT
+CLASS="LITERAL"
+>)</TT
+> and one of the unary operatory
+<TT
+CLASS="LITERAL"
+>*</TT
+>, <TT
+CLASS="LITERAL"
+>+</TT
+>, or <TT
+CLASS="LITERAL"
+>?</TT
+>, there must
+not be whitespace.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<TT
+CLASS="LITERAL"
+><!ELEMENT x (y|z) *></TT
+>; this must be written as
+<TT
+CLASS="LITERAL"
+><!ELEMENT x (y|z)*></TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>There is the additional constraint that the
+right parenthsis must be contained in the same entity as the left parenthesis;
+see the section about parsed entities below.</P
+></LI
+></UL
+> </P
+><P
+>Note that there is another restriction on regular expressions which must be
+deterministic. This means that the parser must be able to see by looking at the
+next token which alternative is actually used, or whether the repetition
+stops. The reason for this is simply compatability with SGML (there is no
+intrinsic reason for this rule; XML can live without this restriction).</P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>The elements are declared as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT q (r?, (s | t)+)>
+<!ELEMENT r (#PCDATA)>
+<!ELEMENT s EMPTY>
+<!ELEMENT t (q | r)></PRE
+>
+
+This is a legal instance:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><q><r>Some characters</r><s/></q></PRE
+>
+
+(Note: <TT
+CLASS="LITERAL"
+><s/></TT
+> is an abbreviation for
+<TT
+CLASS="LITERAL"
+><s></s></TT
+>.)
+
+It would be illegal to leave <TT
+CLASS="LITERAL"
+><s/></TT
+> out because at
+least one instance of <TT
+CLASS="LITERAL"
+>s</TT
+> or <TT
+CLASS="LITERAL"
+>t</TT
+> must be
+present. It would be illegal, too, if characters existed outside the
+<TT
+CLASS="LITERAL"
+>r</TT
+> element; the only exception is white space. -- This is
+legal, too:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><q><s/><t><q><s/></q></t></q></PRE
+></P
+></BLOCKQUOTE
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN304"
+>1.2.4. Attribute lists and ATTLIST declarations</A
+></H2
+><P
+>Elements may have attributes. These are put into the start tag of an element as
+follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><<TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute<SUB
+>1</SUB
+></I
+></TT
+>="<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>1</SUB
+></I
+></TT
+>" ... <TT
+CLASS="REPLACEABLE"
+><I
+>attribute<SUB
+>n</SUB
+></I
+></TT
+>="<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>n</SUB
+></I
+></TT
+>"></PRE
+>
+
+Instead of
+<TT
+CLASS="LITERAL"
+>"<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>k</SUB
+></I
+></TT
+>"</TT
+>
+it is also possible to use single quotes as in
+<TT
+CLASS="LITERAL"
+>'<TT
+CLASS="REPLACEABLE"
+><I
+>value<SUB
+>k</SUB
+></I
+></TT
+>'</TT
+>.
+Note that you cannot use double quotes literally within the value of the
+attribute if double quotes are the delimiters; the same applies to single
+quotes. You can generally not use < and & as characters in attribute
+values. It is possible to include the paraphrases &lt;, &gt;,
+&amp;, &apos;, and &quot; (and any other reference to a general
+entity as long as the entity is not defined by an external file) as well as
+&#<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;.</P
+><P
+>Before you can use an attribute you must declare it. An ATTLIST declaration
+looks as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ATTLIST <TT
+CLASS="REPLACEABLE"
+><I
+>element-name</I
+></TT
+>
+ <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-type</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-default</I
+></TT
+>
+ ...
+ <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-name</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-type</I
+></TT
+> <TT
+CLASS="REPLACEABLE"
+><I
+>attribute-default</I
+></TT
+>
+></PRE
+>
+
+There are a lot of types, but most important are:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>CDATA</TT
+>: Every string is allowed as attribute value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>NMTOKEN</TT
+>: Every nametoken is allowed as attribute
+value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
+order.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+>: A space-separated list of nametokens is allowed as
+attribute value.</P
+></LI
+></UL
+>
+
+The most interesting default declarations are:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>#REQUIRED</TT
+>: The attribute must be specified.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>#IMPLIED</TT
+>: The attribute can be specified but also can be
+left out. The application can find out whether the attribute was present or
+not. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>"<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"</TT
+> or
+<TT
+CLASS="LITERAL"
+>'<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>'</TT
+>: This particular value is
+used as default if the attribute is omitted in the element.</P
+></LI
+></UL
+></P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>This is a valid attribute declaration for element type <TT
+CLASS="LITERAL"
+>r</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ATTLIST r
+ x CDATA #REQUIRED
+ y NMTOKEN #IMPLIED
+ z NMTOKENS "one two three"></PRE
+>
+
+This means that <TT
+CLASS="LITERAL"
+>x</TT
+> is a required attribute that cannot be
+left out, while <TT
+CLASS="LITERAL"
+>y</TT
+> and <TT
+CLASS="LITERAL"
+>z</TT
+> are optional. The
+XML parser indicates the application whether <TT
+CLASS="LITERAL"
+>y</TT
+> is present or
+not, but if <TT
+CLASS="LITERAL"
+>z</TT
+> is missing the default value
+"one two three" is returned automatically. </P
+><P
+>This is a valid example of these attributes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><r x="He said: &quot;I don't like quotes!&quot;" y='1'></PRE
+></P
+></BLOCKQUOTE
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN368"
+>1.2.5. Parsed entities</A
+></H2
+><P
+>Elements describe the logical structure of the document, while
+<I
+CLASS="EMPHASIS"
+>entities</I
+> determine the physical structure. Entities are
+the pieces of text the parser operates on, mostly files and macros. Entities
+may be <I
+CLASS="EMPHASIS"
+>parsed</I
+> in which case the parser reads the text and
+interprets it as XML markup, or <I
+CLASS="EMPHASIS"
+>unparsed</I
+> which simply
+means that the data of the entity has a foreign format (e.g. a GIF icon).</P
+><P
+>If the parsed entity is going to be used as part of the DTD, it
+is called a <I
+CLASS="EMPHASIS"
+>parameter entity</I
+>. You can declare a parameter
+entity with a fixed text as content by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> "<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"></PRE
+>
+
+Within the DTD, you can <I
+CLASS="EMPHASIS"
+>refer to</I
+> this entity, i.e. read
+the text of the entity, by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>%<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</PRE
+>
+
+Such entities behave like macros, i.e. when they are referred to, the
+macro text is inserted and read instead of the original text.
+
+<BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>For example, you can declare two elements with the same content model by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % model "a | b | c">
+<!ELEMENT x (%model;)>
+<!ELEMENT y (%model;)></PRE
+> </P
+></BLOCKQUOTE
+>
+
+If the contents of the entity are given as string constant, the entity is
+called an <I
+CLASS="EMPHASIS"
+>internal</I
+> entity. It is also possible to name a
+file to be used as content (an <I
+CLASS="EMPHASIS"
+>external</I
+> entity):
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>"></PRE
+>
+
+There are some restrictions for parameter entities:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the internal parameter entity contains the first token of a declaration
+(i.e. <TT
+CLASS="LITERAL"
+><!</TT
+>), it must also contain the last token of the
+declaration, i.e. the <TT
+CLASS="LITERAL"
+>></TT
+>. This means that the entity
+either contains a whole number of complete declarations, or some text from the
+middle of one declaration.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % e "(a | b | c)>">
+<!ELEMENT x %e;</PRE
+> Because <TT
+CLASS="LITERAL"
+><!</TT
+> is contained in the main
+entity, and the corresponding <TT
+CLASS="LITERAL"
+>></TT
+> is contained in the
+entity <TT
+CLASS="LITERAL"
+>e</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the internal parameter entity contains a left paranthesis, it must also
+contain the corresponding right paranthesis.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % e "(a | b | c">
+<!ELEMENT x %e;)></PRE
+> Because <TT
+CLASS="LITERAL"
+>(</TT
+> is contained in the entity
+<TT
+CLASS="LITERAL"
+>e</TT
+>, and the corresponding <TT
+CLASS="LITERAL"
+>)</TT
+> is
+contained in the main entity.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>When reading text from an entity, the parser automatically inserts one space
+character before the entity text and one space character after the entity
+text. However, this rule is not applied within the definition of another
+entity.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Legal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+>
+<!ENTITY % suffix "gif">
+<!ENTITY iconfile 'icon.%suffix;'></PRE
+> Because <TT
+CLASS="LITERAL"
+>%suffix;</TT
+> is referenced within
+the definition text for <TT
+CLASS="LITERAL"
+>iconfile</TT
+>, no additional spaces are
+added.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % suffix "test">
+<!ELEMENT x.%suffix; ANY></PRE
+>
+Because <TT
+CLASS="LITERAL"
+>%suffix;</TT
+> is referenced outside the definition
+text of another entity, the parser replaces <TT
+CLASS="LITERAL"
+>%suffix;</TT
+> by
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>space</I
+></TT
+>test<TT
+CLASS="REPLACEABLE"
+><I
+>space</I
+></TT
+></TT
+>. </P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % e "(a | b | c)">
+<!ELEMENT x %e;*></PRE
+> Because there is a whitespace between <TT
+CLASS="LITERAL"
+>)</TT
+>
+and <TT
+CLASS="LITERAL"
+>*</TT
+>, which is illegal.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>An external parameter entity must always consist of a whole number of complete
+declarations.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>In the internal subset of the DTD, a reference to a parameter entity (internal
+or external) is only allowed at positions where a new declaration can start.</P
+></LI
+></UL
+></P
+><P
+>If the parsed entity is going to be used in the document instance, it is called
+a <I
+CLASS="EMPHASIS"
+>general entity</I
+>. Such entities can be used as
+abbreviations for frequent phrases, or to include external files. Internal
+general entities are declared as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> "<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"></PRE
+>
+
+External general entities are declared this way:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY <TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+> SYSTEM "<TT
+CLASS="REPLACEABLE"
+><I
+>file name</I
+></TT
+>"></PRE
+>
+
+References to general entities are written as:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>&<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</PRE
+>
+
+The main difference between parameter and general entities is that the former
+are only recognized in the DTD and that the latter are only recognized in the
+document instance. As the DTD is parsed before the document, the parameter
+entities are expanded first; for example it is possible to use the content of a
+parameter entity as the name of a general entity:
+<TT
+CLASS="LITERAL"
+>&#38;%name;;</TT
+><A
+NAME="AEN445"
+HREF="#FTN.AEN445"
+>[1]</A
+>.</P
+><P
+>General entities must respect the element hierarchy. This means that there must
+be an end tag for every start tag in the entity value, and that end tags
+without corresponding start tags are not allowed.</P
+><BLOCKQUOTE
+CLASS="BLOCKQUOTE"
+><P
+><B
+>Example</B
+></P
+><P
+>If the author of a document changes sometimes, it is worthwhile to set up a
+general entity containing the names of the authors. If the author changes, you
+need only to change the definition of the entity, and do not need to check all
+occurrences of authors' names:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY authors "Gerd Stolpmann"></PRE
+>
+
+In the document text, you can now refer to the author names by writing
+<TT
+CLASS="LITERAL"
+>&authors;</TT
+>.</P
+><P
+><I
+CLASS="EMPHASIS"
+>Illegal:</I
+>
+The following two entities are illegal because the elements in the definition
+do not nest properly:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY lengthy-tag "<section textcolor='white' background='graphic'>">
+<!ENTITY nonsense "<a></b>"></PRE
+></P
+></BLOCKQUOTE
+><P
+>Earlier in this introduction we explained that there are substitutes for
+reserved characters: &lt;, &gt;, &amp;, &apos;, and
+&quot;. These are simply predefined general entities; note that they are
+the only predefined entities. It is allowed to define these entities again
+as long as the meaning is unchanged.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN463"
+>1.2.6. Notations and unparsed entities</A
+></H2
+><P
+>Unparsed entities have a foreign format and can thus not be read by the XML
+parser. Unparsed entities are always external. The format of an unparsed entity
+must have been declared, such a format is called a
+<I
+CLASS="EMPHASIS"
+>notation</I
+>. The entity can then be declared by referring to
+this notation. As unparsed entities do not contain XML text, it is not possible
+to include them directly into the document; you can only declare attributes
+such that names of unparsed entities are acceptable values.</P
+><P
+>As you can see, unparsed entities are too complicated in order to have any
+purpose. It is almost always better to simply pass the name of the data file as
+normal attribute value, and let the application recognize and process the
+foreign format. </P
+></DIV
+></DIV
+><H3
+CLASS="FOOTNOTES"
+>Notes</H3
+><TABLE
+BORDER="0"
+CLASS="FOOTNOTES"
+WIDTH="100%"
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN445"
+HREF="x107.html#AEN445"
+>[1]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>This construct is only
+allowed within the definition of another entity; otherwise extra spaces would
+be added (as explained above). Such indirection is not recommended.</P
+><P
+>Complete example:
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % variant "a"> <!-- or "b" -->
+<!ENTITY text-a "This is text A.">
+<!ENTITY text-b "This is text B.">
+<!ENTITY text "&#38;text-%variant;;"></PRE
+>
+You can now write <TT
+CLASS="LITERAL"
+>&text;</TT
+> in the document instance, and
+depending on the value of <TT
+CLASS="LITERAL"
+>variant</TT
+> either
+<TT
+CLASS="LITERAL"
+>text-a</TT
+> or <TT
+CLASS="LITERAL"
+>text-b</TT
+> is inserted.</P
+></TD
+></TR
+></TABLE
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x468.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>What is XML?</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>The class type extension</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="PREVIOUS"
+TITLE="The class type node"
+HREF="x939.html"><LINK
+REL="NEXT"
+TITLE="Details of the mapping from XML text to the tree representation"
+HREF="x1496.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x939.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 3. The objects representing the document</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1496.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1439"
+>3.3. The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></A
+></H1
+><P
+> <PRE
+CLASS="PROGRAMLISTING"
+>class type [ 'node ] extension =
+ object ('self)
+ method clone : 'self
+ (* "clone" should return an exact deep copy of the object. *)
+ method node : 'node
+ (* "node" returns the corresponding node of this extension. This method
+ * intended to return exactly what previously has been set by "set_node".
+ *)
+ method set_node : 'node -> unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end</PRE
+>
+
+This is the type of classes used for node extensions. For every node of the
+document tree, there is not only the <TT
+CLASS="LITERAL"
+>node</TT
+> object, but also
+an <TT
+CLASS="LITERAL"
+>extension</TT
+> object. The latter has minimal
+functionality; it has only the necessary methods to be attached to the node
+object containing the details of the node instance. The extension object is
+called extension because its purpose is extensibility.</P
+><P
+>For some reasons, it is impossible to derive the
+<TT
+CLASS="LITERAL"
+>node</TT
+> classes (i.e. <TT
+CLASS="LITERAL"
+>element_impl</TT
+> and
+<TT
+CLASS="LITERAL"
+>data_impl</TT
+>) such that the subclasses can be extended by new
+new methods. But
+subclassing nodes is a great feature, because it allows the user to provide
+different classes for different types of nodes. The extension objects are a
+workaround that is as powerful as direct subclassing, the costs are
+some notation overhead.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="EXTENSION-GENERAL"
+></A
+><P
+><B
+>Figure 3-6. The structure of nodes and extensions</B
+></P
+><P
+><IMG
+SRC="pic/extension_general.gif"></P
+></DIV
+><P
+>The picture shows how the nodes and extensions are linked
+together. Every node has a reference to its extension, and every extension has
+a reference to its node. The methods <TT
+CLASS="LITERAL"
+>extension</TT
+> and
+<TT
+CLASS="LITERAL"
+>node</TT
+> follow these references; a typical phrase is
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node # attribute "xy"</PRE
+>
+
+to get the value of an attribute from a method defined in the extension object;
+or
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node # iter
+ (fun n -> n # extension # my_method ...)</PRE
+>
+
+to iterate over the subnodes and to call <TT
+CLASS="LITERAL"
+>my_method</TT
+> of the
+corresponding extension objects.</P
+><P
+>Note that extension objects do not have references to subnodes
+(or "subextensions") themselves; in order to get one of the children of an
+extension you must first go to the node object, then get the child node, and
+finally reach the extension that is logically the child of the extension you
+started with.</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1460"
+>3.3.1. How to define an extension class</A
+></H2
+><P
+>At minimum, you must define the methods
+<TT
+CLASS="LITERAL"
+>clone</TT
+>, <TT
+CLASS="LITERAL"
+>node</TT
+>, and
+<TT
+CLASS="LITERAL"
+>set_node</TT
+> such that your class is compatible with the type
+<TT
+CLASS="LITERAL"
+>extension</TT
+>. The method <TT
+CLASS="LITERAL"
+>set_node</TT
+> is called
+during the initialization of the node, or after a node has been cloned; the
+node object invokes <TT
+CLASS="LITERAL"
+>set_node</TT
+> on the extension object to tell
+it that this node is now the object the extension is linked to. The extension
+must return the node object passed as argument of <TT
+CLASS="LITERAL"
+>set_node</TT
+>
+when the <TT
+CLASS="LITERAL"
+>node</TT
+> method is called.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>clone</TT
+> method must return a copy of the
+extension object; at least the object itself must be duplicated, but if
+required, the copy should deeply duplicate all objects and values that are
+referred by the extension, too. Whether this is required, depends on the
+application; <TT
+CLASS="LITERAL"
+>clone</TT
+> is invoked by the node object when one of
+its cloning methods is called.</P
+><P
+>A good starting point for an extension class:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class custom_extension =
+ object (self)
+
+ val mutable node = (None : custom_extension node option)
+
+ method clone = {< >}
+
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+
+ method set_node n =
+ node <- Some n
+
+ end</PRE
+>
+
+This class is compatible with <TT
+CLASS="LITERAL"
+>extension</TT
+>. The purpose of
+defining such a class is, of course, adding further methods; and you can do it
+without restriction. </P
+><P
+>Often, you want not only one extension class. In this case,
+it is the simplest way that all your classes (for one kind of document) have
+the same type (with respect to the interface; i.e. it does not matter if your
+classes differ in the defined private methods and instance variables, but
+public methods count). This approach avoids lots of coercions and problems with
+type incompatibilities. It is simple to implement:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class custom_extension =
+ object (self)
+ val mutable node = (None : custom_extension node option)
+
+ method clone = ... (* see above *)
+ method node = ... (* see above *)
+ method set_node n = ... (* see above *)
+
+ method virtual my_method1 : ...
+ method virtual my_method2 : ...
+ ... (* etc. *)
+ end
+
+class custom_extension_kind_A =
+ object (self)
+ inherit custom_extension
+
+ method my_method1 = ...
+ method my_method2 = ...
+ end
+
+class custom_extension_kind_B =
+ object (self)
+ inherit custom_extension
+
+ method my_method1 = ...
+ method my_method2 = ...
+ end</PRE
+>
+
+If a class does not need a method (e.g. because it does not make sense, or it
+would violate some important condition), it is possible to define the method
+and to always raise an exception when the method is invoked
+(e.g. <TT
+CLASS="LITERAL"
+>assert false</TT
+>).</P
+><P
+>The latter is a strong recommendation: do not try to further
+specialize the types of extension objects. It is difficult, sometimes even
+impossible, and almost never worth-while.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1481"
+>3.3.2. How to bind extension classes to element types</A
+></H2
+><P
+>Once you have defined your extension classes, you can bind them
+to element types. The simplest case is that you have only one class and that
+this class is to be always used. The parsing functions in the module
+<TT
+CLASS="LITERAL"
+>Pxp_yacc</TT
+> take a <TT
+CLASS="LITERAL"
+>spec</TT
+> argument which
+can be customized. If your single class has the name <TT
+CLASS="LITERAL"
+>c</TT
+>,
+this argument should be
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist: []
+ ()</PRE
+>
+
+This means that data nodes will be created from the exemplar passed by
+~data_exemplar and that all element nodes will be made from the exemplar
+specified by ~default_element_exemplar. In ~element_alist, you can
+pass that different exemplars are to be used for different element types; but
+this is an optional feature. If you do not need it, pass the empty list.</P
+><P
+>Remember that an exemplar is a (node, extension) pair that serves as pattern
+when new nodes (and the corresponding extension objects) are added to the
+document tree. In this case, the exemplar contains <TT
+CLASS="LITERAL"
+>c</TT
+> as
+extension, and when nodes are created, the exemplar is cloned, and cloning
+makes also a copy of <TT
+CLASS="LITERAL"
+>c</TT
+> such that all nodes of the document
+tree will have a copy of <TT
+CLASS="LITERAL"
+>c</TT
+> as extension.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>~element_alist</TT
+> argument can bind
+specific element types to specific exemplars; as exemplars may be instances of
+different classes it is effectively possible to bind element types to
+classes. For example, if the element type "p" is implemented by class "c_p",
+and "q" is realized by "c_q", you can pass the following value:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist:
+ [ "p", new element_impl c_p;
+ "q", new element_impl c_q;
+ ]
+ ()</PRE
+>
+
+The extension object <TT
+CLASS="LITERAL"
+>c</TT
+> is still used for all data nodes and
+for all other element types.</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x939.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1496.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>node</TT
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Details of the mapping from XML text to the tree representation</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Details of the mapping from XML text to the tree representation</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="PREVIOUS"
+TITLE="The class type extension"
+HREF="x1439.html"><LINK
+REL="NEXT"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1439.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 3. The objects representing the document</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c1567.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1496"
+>3.4. Details of the mapping from XML text to the tree representation</A
+></H1
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1498"
+>3.4.1. The representation of character-free elements</A
+></H2
+><P
+>If an element declaration does not allow the element to
+contain character data, the following rules apply.</P
+><P
+>If the element must be empty, i.e. it is declared with the
+keyword <TT
+CLASS="LITERAL"
+>EMPTY</TT
+>, the element instance must be effectively
+empty (it must not even contain whitespace characters). The parser guarantees
+that a declared <TT
+CLASS="LITERAL"
+>EMPTY</TT
+> element does never contain a data
+node, even if the data node represents the empty string.</P
+><P
+>If the element declaration only permits other elements to occur
+within that element but not character data, it is still possible to insert
+whitespace characters between the subelements. The parser ignores these
+characters, too, and does not create data nodes for them.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Example. </B
+>Consider the following element types:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT x ( #PCDATA | z )* >
+<!ELEMENT y ( z )* >
+<!ELEMENT z EMPTY></PRE
+>
+
+Only <TT
+CLASS="LITERAL"
+>x</TT
+> may contain character data, the keyword
+<TT
+CLASS="LITERAL"
+>#PCDATA</TT
+> indicates this. The other types are character-free. </P
+></DIV
+><P
+>The XML term
+
+<PRE
+CLASS="PROGRAMLISTING"
+><x><z/> <z/></x></PRE
+>
+
+will be internally represented by an element node for <TT
+CLASS="LITERAL"
+>x</TT
+>
+with three subnodes: the first <TT
+CLASS="LITERAL"
+>z</TT
+> element, a data node
+containing the space character, and the second <TT
+CLASS="LITERAL"
+>z</TT
+> element.
+In contrast to this, the term
+
+<PRE
+CLASS="PROGRAMLISTING"
+><y><z/> <z/></y></PRE
+>
+
+is represented by an element node for <TT
+CLASS="LITERAL"
+>y</TT
+> with only
+<I
+CLASS="EMPHASIS"
+>two</I
+> subnodes, the two <TT
+CLASS="LITERAL"
+>z</TT
+> elements. There
+is no data node for the space character because spaces are ignored in the
+character-free element <TT
+CLASS="LITERAL"
+>y</TT
+>.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1521"
+>3.4.2. The representation of character data</A
+></H2
+><P
+>The XML specification allows all Unicode characters in XML
+texts. This parser can be configured such that UTF-8 is used to represent the
+characters internally; however, the default character encoding is
+ISO-8859-1. (Currently, no other encodings are possible for the internal string
+representation; the type <TT
+CLASS="LITERAL"
+>Pxp_types.rep_encoding</TT
+> enumerates
+the possible encodings. Principially, the parser could use any encoding that is
+ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
+ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
+encodings (or other multibyte encodings which are not ASCII-compatible) unless
+major parts of the parser are rewritten - unlikely...)</P
+><P
+>The internal encoding may be different from the external encoding (specified
+in the XML declaration <TT
+CLASS="LITERAL"
+><?xml ... encoding="..."?></TT
+>); in
+this case the strings are automatically converted to the internal encoding.</P
+><P
+>If the internal encoding is ISO-8859-1, it is possible that there are
+characters that cannot be represented. In this case, the parser ignores such
+characters and prints a warning (to the <TT
+CLASS="LITERAL"
+>collect_warning</TT
+>
+object that must be passed when the parser is called).</P
+><P
+>The XML specification allows lines to be separated by single LF
+characters, by CR LF character sequences, or by single CR
+characters. Internally, these separators are always converted to single LF
+characters.</P
+><P
+>The parser guarantees that there are never two adjacent data
+nodes; if necessary, data material that would otherwise be represented by
+several nodes is collapsed into one node. Note that you can still create node
+trees with adjacent data nodes; however, the parser does not return such trees.</P
+><P
+>Note that CDATA sections are not represented specially; such
+sections are added to the current data material that being collected for the
+next data node.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1532"
+>3.4.3. The representation of entities within documents</A
+></H2
+><P
+><I
+CLASS="EMPHASIS"
+>Entities are not represented within
+documents!</I
+> If the parser finds an entity reference in the document
+content, the reference is immediately expanded, and the parser reads the
+expansion text instead of the reference.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1536"
+>3.4.4. The representation of attributes</A
+></H2
+><P
+>As attribute
+values are composed of Unicode characters, too, the same problems with the
+character encoding arise as for character material. Attribute values are
+converted to the internal encoding, too; and if there are characters that
+cannot be represented, these are dropped, and a warning is printed.</P
+><P
+>Attribute values are normalized before they are returned by
+methods like <TT
+CLASS="LITERAL"
+>attribute</TT
+>. First, any remaining entity
+references are expanded; if necessary, expansion is performed recursively.
+Second, newline characters (any of LF, CR LF, or CR characters) are converted
+to single space characters. Note that especially the latter action is
+prescribed by the XML standard (but <TT
+CLASS="LITERAL"
+></TT
+> is not converted
+such that it is still possible to include line feeds into attributes).</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1542"
+>3.4.5. The representation of processing instructions</A
+></H2
+><P
+>Processing instructions are parsed to some extent: The first word of the
+PI is called the target, and it is stored separated from the rest of the PI:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><?target rest?></PRE
+>
+
+The exact location where a PI occurs is not represented (by default). The
+parser puts the PI into the object that represents the embracing construct (an
+element, a DTD, or the whole document); that means you can find out which PIs
+occur in a certain element, in the DTD, or in the whole document, but you
+cannot lookup the exact position within the construct.</P
+><P
+>If you require the exact location of PIs, it is possible to
+create extra nodes for them. This mode is controled by the option
+<TT
+CLASS="LITERAL"
+>enable_pinstr_nodes</TT
+>. The additional nodes have the node type
+<TT
+CLASS="LITERAL"
+>T_pinstr <TT
+CLASS="REPLACEABLE"
+><I
+>target</I
+></TT
+></TT
+>, and are created
+from special exemplars contained in the <TT
+CLASS="LITERAL"
+>spec</TT
+> (see
+pxp_document.mli).</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1551"
+>3.4.6. The representation of comments</A
+></H2
+><P
+>Normally, comments are not represented; they are dropped by
+default. However, if you require them, it is possible to create
+<TT
+CLASS="LITERAL"
+>T_comment</TT
+> nodes for them. This mode can be specified by the
+option <TT
+CLASS="LITERAL"
+>enable_comment_nodes</TT
+>. Comment nodes are created from
+special exemplars contained in the <TT
+CLASS="LITERAL"
+>spec</TT
+> (see
+pxp_document.mli). You can access the contents of comments through the
+method <TT
+CLASS="LITERAL"
+>comment</TT
+>.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1558"
+>3.4.7. The attributes <TT
+CLASS="LITERAL"
+>xml:lang</TT
+> and
+<TT
+CLASS="LITERAL"
+>xml:space</TT
+></A
+></H2
+><P
+>These attributes are not supported specially; they are handled
+like any other attribute.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1563"
+>3.4.8. And what about namespaces?</A
+></H2
+><P
+>Currently, there is no special support for namespaces.
+However, the parser allows it that the colon occurs in names such that it is
+possible to implement namespaces on top of the current API.</P
+><P
+>Some future release of PXP will support namespaces as built-in
+feature...</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1439.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Configuring and calling the parser</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Resolvers and sources</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="NEXT"
+TITLE="The DTD classes"
+HREF="x1812.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c1567.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1812.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1629"
+>4.2. Resolvers and sources</A
+></H1
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1631"
+>4.2.1. Using the built-in resolvers (called sources)</A
+></H2
+><P
+>The type <TT
+CLASS="LITERAL"
+>source</TT
+> enumerates the two
+possibilities where the document to parse comes from.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)</PRE
+>
+
+You normally need not to worry about this type as there are convenience
+functions that create <TT
+CLASS="LITERAL"
+>source</TT
+> values:
+
+
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>from_file s</TT
+>: The document is read from
+file <TT
+CLASS="LITERAL"
+>s</TT
+>; you may specify absolute or relative path names.
+The file name must be encoded as UTF-8 string.</P
+><P
+>There is an optional argument <TT
+CLASS="LITERAL"
+>~system_encoding</TT
+>
+specifying the character encoding which is used for the names of the file
+system. For example, if this encoding is ISO-8859-1 and <TT
+CLASS="LITERAL"
+>s</TT
+> is
+also a ISO-8859-1 string, you can form the source:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let s_utf8 = recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
+from_file ~system_encoding:`Enc_iso88591 s_utf8</PRE
+></P
+><P
+>This <TT
+CLASS="LITERAL"
+>source</TT
+> has the advantage that
+it is able to resolve inner external entities; i.e. if your document includes
+data from another file (using the <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> attribute), this
+mode will find that file. However, this mode cannot resolve
+<TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> identifiers nor <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> identifiers
+other than "file:".</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>from_channel ch</TT
+>: The document is read
+from the channel <TT
+CLASS="LITERAL"
+>ch</TT
+>. In general, this source also supports
+file URLs found in the document; however, by default only absolute URLs are
+understood. It is possible to associate an ID with the channel such that the
+resolver knows how to interpret relative URLs:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>from_channel ~id:(System "file:///dir/dir1/") ch</PRE
+>
+
+There is also the ~system_encoding argument specifying how file names are
+encoded. - The example from above can also be written (but it is no
+longer possible to interpret relative URLs because there is no ~id argument,
+and computing this argument is relatively complicated because it must
+be a valid URL):
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let ch = open_in s in
+let src = from_channel ~system_encoding:`Enc_iso88591 ch in
+...;
+close_in ch</PRE
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>from_string s</TT
+>: The string
+<TT
+CLASS="LITERAL"
+>s</TT
+> is the document to parse. This mode is not able to
+interpret file names of <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> clauses, nor it can look up
+<TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> identifiers. </P
+><P
+>Normally, the encoding of the string is detected as usual
+by analyzing the XML declaration, if any. However, it is also possible to
+specify the encoding directly:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let src = from_string ~fixenc:`ISO-8859-2 s</PRE
+></P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>ExtID (id, r)</TT
+>: The document to parse
+is denoted by the identifier <TT
+CLASS="LITERAL"
+>id</TT
+> (either a
+<TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> or <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+> clause), and this
+identifier is interpreted by the resolver <TT
+CLASS="LITERAL"
+>r</TT
+>. Use this mode
+if you have written your own resolver.</P
+><P
+>Which character sets are possible depends on the passed
+resolver <TT
+CLASS="LITERAL"
+>r</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Entity (get_entity, r)</TT
+>: The document
+to parse is returned by the function invocation <TT
+CLASS="LITERAL"
+>get_entity
+dtd</TT
+>, where <TT
+CLASS="LITERAL"
+>dtd</TT
+> is the DTD object to use (it may be
+empty). Inner external references occuring in this entity are resolved using
+the resolver <TT
+CLASS="LITERAL"
+>r</TT
+>.</P
+><P
+>Which character sets are possible depends on the passed
+resolver <TT
+CLASS="LITERAL"
+>r</TT
+>.</P
+></LI
+></UL
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1682"
+>4.2.2. The resolver API</A
+></H2
+><P
+>A resolver is an object that can be opened like a file, but you
+do not pass the file name to the resolver, but the XML identifier of the entity
+to read from (either a <TT
+CLASS="LITERAL"
+>SYSTEM</TT
+> or <TT
+CLASS="LITERAL"
+>PUBLIC</TT
+>
+clause). When opened, the resolver must return the
+<TT
+CLASS="LITERAL"
+>Lexing.lexbuf</TT
+> that reads the characters. The resolver can
+be closed, and it can be cloned. Furthermore, it is possible to tell the
+resolver which character set it should assume. - The following from Pxp_reader:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>exception Not_competent
+exception Not_resolvable of exn
+
+class type resolver =
+ object
+ method init_rep_encoding : rep_encoding -> unit
+ method init_warner : collect_warnings -> unit
+ method rep_encoding : rep_encoding
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ method close_all : unit
+ end</PRE
+>
+
+The resolver object must work as follows:</P
+><P
+> <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>When the parser is called, it tells the resolver the
+warner object and the internal encoding by invoking
+<TT
+CLASS="LITERAL"
+>init_warner</TT
+> and <TT
+CLASS="LITERAL"
+>init_rep_encoding</TT
+>. The
+resolver should store these values. The method <TT
+CLASS="LITERAL"
+>rep_encoding</TT
+>
+should return the internal encoding.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the parser wants to read from the resolver, it invokes
+the method <TT
+CLASS="LITERAL"
+>open_in</TT
+>. Either the resolver succeeds, in which
+case the <TT
+CLASS="LITERAL"
+>Lexing.lexbuf</TT
+> reading from the file or stream must
+be returned, or opening fails. In the latter case the method implementation
+should raise an exception (see below).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the parser finishes reading, it calls the
+<TT
+CLASS="LITERAL"
+>close_in</TT
+> method.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If the parser finds a reference to another external
+entity in the input stream, it calls <TT
+CLASS="LITERAL"
+>clone</TT
+> to get a second
+resolver which must be initially closed (not yet connected with an input
+stream). The parser then invokes <TT
+CLASS="LITERAL"
+>open_in</TT
+> and the other
+methods as described.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If you already know the character set of the input
+stream, you should recode it to the internal encoding, and define the method
+<TT
+CLASS="LITERAL"
+>change_encoding</TT
+> as an empty method.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If you want to support multiple external character sets,
+the object must follow a much more complicated protocol. Directly after
+<TT
+CLASS="LITERAL"
+>open_in</TT
+> has been called, the resolver must return a lexical
+buffer that only reads one byte at a time. This is only possible if you create
+the lexical buffer with <TT
+CLASS="LITERAL"
+>Lexing.from_function</TT
+>; the function
+must then always return 1 if the EOF is not yet reached, and 0 if EOF is
+reached. If the parser has read the first line of the document, it will invoke
+<TT
+CLASS="LITERAL"
+>change_encoding</TT
+> to tell the resolver which character set to
+assume. From this moment, the object can return more than one byte at once. The
+argument of <TT
+CLASS="LITERAL"
+>change_encoding</TT
+> is either the parameter of the
+"encoding" attribute of the XML declaration, or the empty string if there is
+not any XML declaration or if the declaration does not contain an encoding
+attribute. </P
+><P
+>At the beginning the resolver must only return one
+character every time something is read from the lexical buffer. The reason for
+this is that you otherwise would not exactly know at which position in the
+input stream the character set changes.</P
+><P
+>If you want automatic recognition of the character set,
+it is up to the resolver object to implement this.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>If an error occurs, the parser calls the method
+<TT
+CLASS="LITERAL"
+>close_all</TT
+> for the top-level resolver; this method should
+close itself (if not already done) and all clones.</P
+></LI
+></UL
+></P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Exceptions. </B
+>It is possible to chain resolvers such that when the first resolver is not able
+to open the entity, the other resolvers of the chain are tried in turn. The
+method <TT
+CLASS="LITERAL"
+>open_in</TT
+> should raise the exception
+<TT
+CLASS="LITERAL"
+>Not_competent</TT
+> to indicate that the next resolver should try
+to open the entity. If the resolver is able to handle the ID, but some other
+error occurs, the exception <TT
+CLASS="LITERAL"
+>Not_resolvable</TT
+> should be raised
+to force that the chain breaks.
+ </P
+></DIV
+><P
+>Example: How to define a resolver that is equivalent to
+from_string: ...</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1728"
+>4.2.3. Predefined resolver components</A
+></H2
+><P
+>There are some classes in Pxp_reader that define common resolver behaviour.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_this_channel :
+ ?id:ext_id ->
+ ?fixenc:encoding ->
+ ?auto_close:bool ->
+ in_channel ->
+ resolver</PRE
+>
+
+Reads from the passed channel (it may be even a pipe). If the
+<TT
+CLASS="LITERAL"
+>~id</TT
+> argument is passed to the object, the created resolver
+accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has
+been cloned, it does not accept any ID. This means that this resolver cannot
+handle inner references to external entities. Note that you can combine this
+resolver with another resolver that can handle inner references (such as
+resolve_as_file); see class 'combine' below. - If you pass the
+<TT
+CLASS="LITERAL"
+>~fixenc</TT
+> argument, the encoding of the channel is set to the
+passed value, regardless of any auto-recognition or any XML declaration. - If
+<TT
+CLASS="LITERAL"
+>~auto_close = true</TT
+> (which is the default), the channel is
+closed after use. If <TT
+CLASS="LITERAL"
+>~auto_close = false</TT
+>, the channel is
+left open.
+ </P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_any_channel :
+ ?auto_close:bool ->
+ channel_of_id:(ext_id -> (in_channel * encoding option)) ->
+ resolver</PRE
+>
+
+This resolver calls the function <TT
+CLASS="LITERAL"
+>~channel_of_id</TT
+> to open a
+new channel for the passed <TT
+CLASS="LITERAL"
+>ext_id</TT
+>. This function must either
+return the channel and the encoding, or it must fail with Not_competent. The
+function must return <TT
+CLASS="LITERAL"
+>None</TT
+> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<TT
+CLASS="LITERAL"
+>Some e</TT
+> if it is already known that the encoding of the
+channel is <TT
+CLASS="LITERAL"
+>e</TT
+>. If <TT
+CLASS="LITERAL"
+>~auto_close = true</TT
+>
+(which is the default), the channel is closed after use. If
+<TT
+CLASS="LITERAL"
+>~auto_close = false</TT
+>, the channel is left open.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_url_channel :
+ ?base_url:Neturl.url ->
+ ?auto_close:bool ->
+ url_of_id:(ext_id -> Neturl.url) ->
+ channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
+ resolver</PRE
+>
+
+When this resolver gets an ID to read from, it calls the function
+<TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> to get the corresponding URL. This URL may be a
+relative URL; however, a URL scheme must be used which contains a path. The
+resolver converts the URL to an absolute URL if necessary. The second
+function, <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+>, is fed with the absolute URL as
+input. This function opens the resource to read from, and returns the channel
+and the encoding of the resource.</P
+><P
+>Both functions, <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> and
+<TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+>, can raise Not_competent to indicate that
+the object is not able to read from the specified resource. However, there is a
+difference: A Not_competent from <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> is left as it
+is, but a Not_competent from <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+> is converted to
+Not_resolvable. So only <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+> decides which URLs are
+accepted by the resolver and which not.</P
+><P
+>The function <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+> must return
+<TT
+CLASS="LITERAL"
+>None</TT
+> as encoding if the default mechanism to recognize the
+encoding should be used. It must return <TT
+CLASS="LITERAL"
+>Some e</TT
+> if it is
+already known that the encoding of the channel is <TT
+CLASS="LITERAL"
+>e</TT
+>.</P
+><P
+>If <TT
+CLASS="LITERAL"
+>~auto_close = true</TT
+> (which is the default), the channel is
+closed after use. If <TT
+CLASS="LITERAL"
+>~auto_close = false</TT
+>, the channel is
+left open.</P
+><P
+>Objects of this class contain a base URL relative to which relative URLs are
+interpreted. When creating a new object, you can specify the base URL by
+passing it as <TT
+CLASS="LITERAL"
+>~base_url</TT
+> argument. When an existing object is
+cloned, the base URL of the clone is the URL of the original object. - Note
+that the term "base URL" has a strict definition in RFC 1808.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_this_string :
+ ?id:ext_id ->
+ ?fixenc:encoding ->
+ string ->
+ resolver</PRE
+>
+
+Reads from the passed string. If the <TT
+CLASS="LITERAL"
+>~id</TT
+> argument is passed
+to the object, the created resolver accepts only this ID. Otherwise all IDs are
+accepted. - Once the resolver has been cloned, it does not accept any ID. This
+means that this resolver cannot handle inner references to external
+entities. Note that you can combine this resolver with another resolver that
+can handle inner references (such as resolve_as_file); see class 'combine'
+below. - If you pass the <TT
+CLASS="LITERAL"
+>~fixenc</TT
+> argument, the encoding of
+the string is set to the passed value, regardless of any auto-recognition or
+any XML declaration.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_read_any_string :
+ string_of_id:(ext_id -> (string * encoding option)) ->
+ resolver</PRE
+>
+
+This resolver calls the function <TT
+CLASS="LITERAL"
+>~string_of_id</TT
+> to get the
+string for the passed <TT
+CLASS="LITERAL"
+>ext_id</TT
+>. This function must either
+return the string and the encoding, or it must fail with Not_competent. The
+function must return <TT
+CLASS="LITERAL"
+>None</TT
+> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<TT
+CLASS="LITERAL"
+>Some e</TT
+> if it is already known that the encoding of the
+string is <TT
+CLASS="LITERAL"
+>e</TT
+>.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class resolve_as_file :
+ ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?system_encoding:encoding ->
+ ?url_of_id:(ext_id -> Neturl.url) ->
+ ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+ unit ->
+ resolver</PRE
+>
+Reads from the local file system. Every file name is interpreted as
+file name of the local file system, and the referred file is read.</P
+><P
+>The full form of a file URL is: file://host/path, where
+'host' specifies the host system where the file identified 'path'
+resides. host = "" or host = "localhost" are accepted; other values
+will raise Not_competent. The standard for file URLs is
+defined in RFC 1738.</P
+><P
+>Option <TT
+CLASS="LITERAL"
+>~file_prefix</TT
+>: Specifies how the "file:" prefix of
+file names is handled:
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Not_recognized:</TT
+>The prefix is not
+recognized.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Allowed:</TT
+> The prefix is allowed but
+not required (the default).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Required:</TT
+> The prefix is
+required.</P
+></LI
+></UL
+></P
+><P
+>Option <TT
+CLASS="LITERAL"
+>~host_prefix:</TT
+> Specifies how the "//host" phrase of
+file names is handled:
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Not_recognized:</TT
+>The prefix is not
+recognized.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Allowed:</TT
+> The prefix is allowed but
+not required (the default).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>`Required:</TT
+> The prefix is
+required.</P
+></LI
+></UL
+></P
+><P
+>Option <TT
+CLASS="LITERAL"
+>~system_encoding:</TT
+> Specifies the encoding of file
+names of the local file system. Default: UTF-8.</P
+><P
+>Options <TT
+CLASS="LITERAL"
+>~url_of_id</TT
+>, <TT
+CLASS="LITERAL"
+>~channel_of_url</TT
+>: Not
+for the casual user!</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class combine :
+ ?prefer:resolver ->
+ resolver list ->
+ resolver</PRE
+>
+
+Combines several resolver objects. If a concrete entity with an
+<TT
+CLASS="LITERAL"
+>ext_id</TT
+> is to be opened, the combined resolver tries the
+contained resolvers in turn until a resolver accepts opening the entity
+(i.e. it does not raise Not_competent on open_in).</P
+><P
+>Clones: If the 'clone' method is invoked before 'open_in', all contained
+resolvers are cloned separately and again combined. If the 'clone' method is
+invoked after 'open_in' (i.e. while the resolver is open), additionally the
+clone of the active resolver is flagged as being preferred, i.e. it is tried
+first. </P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1812.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Configuring and calling the parser</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The DTD classes</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>The DTD classes</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="Resolvers and sources"
+HREF="x1629.html"><LINK
+REL="NEXT"
+TITLE="Invoking the parser"
+HREF="x1818.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1629.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1818.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1812"
+>4.3. The DTD classes</A
+></H1
+><P
+><I
+CLASS="EMPHASIS"
+>Sorry, not yet
+written. Perhaps the interface definition of Pxp_dtd expresses the same:</I
+></P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+> (**********************************************************************)
+(* *)
+(* Pxp_dtd: *)
+(* Object model of document type declarations *)
+(* *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ * declarations, entity declarations, notation
+ * declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ * of a content model and an attribute list
+ * declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+ (* Creation:
+ * new dtd
+ * creates a new, empty DTD object without any declaration, without a root
+ * element, without an ID.
+ *)
+ Pxp_types.collect_warnings ->
+ Pxp_types.rep_encoding ->
+ object
+ method root : string option
+ (* get the name of the root element if present *)
+
+ method set_root : string -> unit
+ (* set the name of the root element. This method can be invoked
+ * only once
+ *)
+
+ method id : Pxp_types.dtd_id option
+ (* get the identifier for this DTD *)
+
+ method set_id : Pxp_types.dtd_id -> unit
+ (* set the identifier. This method can be invoked only once *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* returns the encoding used for character representation *)
+
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - elements and notations that have not been added may be used in an
+ * arbitrary way; the methods "element" and "notation" indicate this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary contents are allowed or not. *)
+
+ method standalone_declaration : bool
+ (* Whether there is a 'standalone' declaration or not. Strictly
+ * speaking, this declaration is not part of the DTD, but it is
+ * included here because of practical reasons.
+ * If not set, this property defaults to 'false'.
+ *)
+
+ method set_standalone_declaration : bool -> unit
+ (* Sets the 'standalone' declaration. *)
+
+
+ method add_element : dtd_element -> unit
+ (* add the given element declaration to this DTD. Raises Not_found
+ * if there is already an element declaration with the same name.
+ *)
+
+ method add_gen_entity : Pxp_entity.entity -> bool -> unit
+ (* add_gen_entity e extdecl:
+ * add the entity 'e' as general entity to this DTD (general entities
+ * are those represented by &name;). If there is already a declaration
+ * with the same name, the second definition is ignored; as exception from
+ * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+ * may only be redeclared with a definition that is equivalent to the
+ * standard definition; otherwise a Validation_error is raised.
+ *
+ * 'extdecl': 'true' indicates that the entity declaration occurs in
+ * an external entity. (Used for the standalone check.)
+ *)
+
+ method add_par_entity : Pxp_entity.entity -> unit
+ (* add the given entity as parameter entity to this DTD (parameter
+ * entities are those represented by %name;). If there is already a
+ * declaration with the same name, the second definition is ignored.
+ *)
+
+ method add_notation : dtd_notation -> unit
+ (* add the given notation to this DTD. If there is already a declaration
+ * with the same name, a Validation_error is raised.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* add the given processing instruction to this DTD. *)
+
+ method element : string -> dtd_element
+ (* looks up the element declaration with the given name. Raises
+ * Validation_error if the element cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method element_names : string list
+ (* returns the list of the names of all element declarations. *)
+
+ method gen_entity : string -> (Pxp_entity.entity * bool)
+ (* let e, extdecl = obj # gen_entity n:
+ * looks up the general entity 'e' with the name 'n'. Raises
+ * WF_error if the entity cannot be found.
+ * 'extdecl': indicates whether the entity declaration occured in an
+ * external entity.
+ *)
+
+ method gen_entity_names : string list
+ (* returns the list of all general entity names *)
+
+ method par_entity : string -> Pxp_entity.entity
+ (* looks up the parameter entity with the given name. Raises
+ * WF_error if the entity cannot be found.
+ *)
+
+ method par_entity_names : string list
+ (* returns the list of all parameter entity names *)
+
+ method notation : string -> dtd_notation
+ (* looks up the notation declaration with the given name. Raises
+ * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method notation_names : string list
+ (* Returns the list of the names of all added notations *)
+
+ method pinstr : string -> proc_instruction list
+ (* looks up all processing instructions with the given target.
+ * The "target" is the identifier following "<?".
+ * Note: It is not possible to find out the exact position of the
+ * processing instruction.
+ *)
+
+ method pinstr_names : string list
+ (* Returns the list of the names (targets) of all added pinstrs *)
+
+ method validate : unit
+ (* ensures that the DTD is valid. This method is optimized such that
+ * actual validation is only performed if DTD has changed.
+ * If the DTD is invalid, mostly a Validation_error is raised,
+ * but other exceptions are possible, too.
+ *)
+
+ method only_deterministic_models : unit
+ (* Succeeds if all regexp content models are deterministic.
+ * Otherwise Validation_error.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
+ (* write_compact_as_latin1 os enc doctype:
+ * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
+ * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
+ * only the declarations are written (the material within the
+ * square brackets).
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+
+ (*----------------------------------------*)
+ method invalidate : unit
+ (* INTERNAL METHOD *)
+ method warner : Pxp_types.collect_warnings
+ (* INTERNAL METHOD *)
+ end
+
+
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -> string ->
+ (* Creation:
+ * new dtd_element init_dtd init_name:
+ * creates a new dtd_element object for init_dtd with init_name.
+ * The strings are represented in the same encoding as init_dtd.
+ *)
+ object
+
+ method name : string
+ (* returns the name of the declared element *)
+
+ method externally_declared : bool
+ (* returns whether the element declaration occurs in an external
+ * entity.
+ *)
+
+ method content_model : Pxp_types.content_model_type
+ (* get the content model of this element declaration, or Unspecified *)
+
+ method content_dfa : Pxp_dfa.dfa_definition option
+ (* return the DFA of the content model if there is a DFA, or None.
+ * A DFA exists only for regexp style content models which are
+ * deterministic.
+ *)
+
+ method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
+ (* set_cm_and_extdecl cm extdecl:
+ * set the content model to 'cm'. Once the content model is not
+ * Unspecified, it cannot be set to a different value again.
+ * Furthermore, it is set whether the element occurs in an external
+ * entity ('extdecl').
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Return the encoding of the strings *)
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - attributes that have not been added may be used in an
+ * arbitrary way; the method "attribute" indicates this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary attributes are allowed or not. *)
+
+ method attribute : string ->
+ Pxp_types.att_type * Pxp_types.att_default
+ (* get the type and default value of a declared attribute, or raise
+ * Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_violates_standalone_declaration :
+ string -> string option -> bool
+ (* attribute_violates_standalone_declaration name v:
+ * Checks whether the attribute 'name' violates the "standalone"
+ * declaration if it has value 'v'.
+ * The method returns true if:
+ * - The attribute declaration occurs in an external entity,
+ * and if one of the two conditions holds:
+ * - v = None, and there is a default for the attribute value
+ * - v = Some s, and the type of the attribute is not CDATA,
+ * and s changes if normalized according to the rules of the
+ * attribute type.
+ *
+ * The method raises Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_names : string list
+ (* get the list of all declared attributes *)
+
+ method names_of_required_attributes : string list
+ (* get the list of all attributes that are specified as required
+ * attributes
+ *)
+
+ method id_attribute_name : string option
+ (* Returns the name of the attribute with type ID, or None. *)
+
+ method idref_attribute_names : string list
+ (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+ method add_attribute : string ->
+ Pxp_types.att_type ->
+ Pxp_types.att_default ->
+ bool ->
+ unit
+ (* add_attribute name type default extdecl:
+ * add an attribute declaration for an attribute with the given name,
+ * type, and default value. If there is more than one declaration for
+ * an attribute name, the first declaration counts; the other declarations
+ * are ignored.
+ * 'extdecl': if true, the attribute declaration occurs in an external
+ * entity. This property is used to check the "standalone" attribute.
+ *)
+
+ method validate : unit
+ (* checks whether this element declaration (i.e. the content model and
+ * all attribute declarations) is valid for the associated DTD.
+ * Raises mostly Validation_error if the validation fails.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new dtd_notation a_name an_external_ID init_encoding
+ * creates a new dtd_notation object with the given name and the given
+ * external ID.
+ *)
+ object
+ method name : string
+ method ext_id : Pxp_types.ext_id
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
+ * string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new proc_instruction a_target a_value
+ * creates a new proc_instruction object with the given target string and
+ * the given value string.
+ * Note: A processing instruction is written as <?target value?>.
+ *)
+ object
+ method target : string
+ method value : string
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write os enc:
+ * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ method parse_pxp_option : (string * string * (string * string) list)
+ (* Parses a PI containing a PXP option. Such PIs are formed like:
+ * <?target option-name option-att="value" option-att="value" ... ?>
+ * The method returns a triple
+ * (target, option-name, [option-att, value; ...])
+ * or raises Error.
+ *)
+
+ end
+
+;; </PRE
+></P
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1629.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1818.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Resolvers and sources</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Invoking the parser</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Invoking the parser</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="The DTD classes"
+HREF="x1812.html"><LINK
+REL="NEXT"
+TITLE="Updates"
+HREF="x1965.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1812.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1965.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1818"
+>4.4. Invoking the parser</A
+></H1
+><P
+>Here a description of Pxp_yacc.</P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1821"
+>4.4.1. Defaults</A
+></H2
+><P
+>The following defaults are available:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val default_config : config
+val default_extension : ('a node extension) as 'a
+val default_spec : ('a node extension as 'a) spec</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1825"
+>4.4.2. Parsing functions</A
+></H2
+><P
+>In the following, the term "closed document" refers to
+an XML structure like
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!DOCTYPE ... [ <TT
+CLASS="REPLACEABLE"
+><I
+>declarations</I
+></TT
+> ] >
+<<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>>
+...
+</<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>></PRE
+>
+
+The term "fragment" refers to an XML structure like
+
+<PRE
+CLASS="PROGRAMLISTING"
+><<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>>
+...
+</<TT
+CLASS="REPLACEABLE"
+><I
+>root</I
+></TT
+>></PRE
+>
+
+i.e. only to one isolated element instance.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_dtd_entity : config -> source -> dtd</PRE
+>
+
+Parses the declarations which are contained in the entity, and returns them as
+<TT
+CLASS="LITERAL"
+>dtd</TT
+> object.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val extract_dtd_from_document_entity : config -> source -> dtd</PRE
+>
+
+Extracts the DTD from a closed document. Both the internal and the external
+subsets are extracted and combined to one <TT
+CLASS="LITERAL"
+>dtd</TT
+> object. This
+function does not parse the whole document, but only the parts that are
+necessary to extract the DTD.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_document_entity :
+ ?transform_dtd:(dtd -> dtd) ->
+ ?id_index:('ext index) ->
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext document</PRE
+>
+
+Parses a closed document and validates it against the DTD that is contained in
+the document (internal and external subsets). The option
+<TT
+CLASS="LITERAL"
+>~transform_dtd</TT
+> can be used to transform the DTD in the
+document, and to use the transformed DTD for validation. If
+<TT
+CLASS="LITERAL"
+>~id_index</TT
+> is specified, an index of all ID attributes is
+created.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_wfdocument_entity :
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext document</PRE
+>
+
+Parses a closed document, but checks it only on well-formedness.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_content_entity :
+ ?id_index:('ext index) ->
+ config ->
+ source ->
+ dtd ->
+ 'ext spec ->
+ 'ext node</PRE
+>
+
+Parses a fragment, and validates the element.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>val parse_wfcontent_entity :
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext node</PRE
+>
+
+Parses a fragment, but checks it only on well-formedness.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1851"
+>4.4.3. Configuration options</A
+></H2
+><P
+> <PRE
+CLASS="PROGRAMLISTING"
+>type config =
+ { warner : collect_warnings;
+ errors_with_line_numbers : bool;
+ enable_pinstr_nodes : bool;
+ enable_super_root_node : bool;
+ enable_comment_nodes : bool;
+ encoding : rep_encoding;
+ recognize_standalone_declaration : bool;
+ store_element_positions : bool;
+ idref_pass : bool;
+ validate_by_dfa : bool;
+ accept_only_deterministic_models : bool;
+ ...
+ }</PRE
+>
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>warner:</TT
+>The parser prints
+warnings by invoking the method <TT
+CLASS="LITERAL"
+>warn</TT
+> for this warner
+object. (Default: all warnings are dropped)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>errors_with_line_numbers:</TT
+>If
+true, errors contain line numbers; if false, errors contain only byte
+positions. The latter mode is faster. (Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>enable_pinstr_nodes:</TT
+>If true,
+the parser creates extra nodes for processing instructions. If false,
+processing instructions are simply added to the element or document surrounding
+the instructions. (Default: false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>enable_super_root_node:</TT
+>If
+true, the parser creates an extra node which is the parent of the root of the
+document tree. This node is called super root; it is an element with type
+<TT
+CLASS="LITERAL"
+>T_super_root</TT
+>. - If there are processing instructions outside
+the root element and outside the DTD, they are added to the super root instead
+of the document. - If false, the super root node is not created. (Default:
+false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>enable_comment_nodes:</TT
+>If true,
+the parser creates nodes for comments with type <TT
+CLASS="LITERAL"
+>T_comment</TT
+>;
+if false, such nodes are not created. (Default: false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>encoding:</TT
+>Specifies the
+internal encoding of the parser. Most strings are then represented according to
+this encoding; however there are some exceptions (especially
+<TT
+CLASS="LITERAL"
+>ext_id</TT
+> values which are always UTF-8 encoded).
+(Default: `Enc_iso88591)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>recognize_standalone_declaration:</TT
+> If true and if the parser is
+validating, the <TT
+CLASS="LITERAL"
+>standalone="yes"</TT
+> declaration forces that it
+is checked whether the document is a standalone document. - If false, or if the
+parser is in well-formedness mode, such declarations are ignored.
+(Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>store_element_positions:</TT
+> If
+true, for every non-data node the source position is stored. If false, the
+position information is lost. If available, you can get the positions of nodes
+by invoking the <TT
+CLASS="LITERAL"
+>position</TT
+> method.
+(Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>idref_pass:</TT
+>If true and if
+there is an ID index, the parser checks whether every IDREF or IDREFS attribute
+refer to an existing node; this requires that the parser traverses the whole
+doument tree. If false, this check is left out. (Default: false)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>validate_by_dfa:</TT
+>If true and if
+the content model for an element type is deterministic, a deterministic finite
+automaton is used to validate whether the element contents match the content
+model of the type. If false, or if a DFA is not available, a backtracking
+algorithm is used for validation. (Default: true)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>accept_only_deterministic_models:</TT
+> If true, only deterministic content
+models are accepted; if false, any syntactically correct content models can be
+processed. (Default: true)</P
+></LI
+></UL
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1895"
+>4.4.4. Which configuration should I use?</A
+></H2
+><P
+>First, I recommend to vary the default configuration instead of
+creating a new configuration record. For instance, to set
+<TT
+CLASS="LITERAL"
+>idref_pass</TT
+> to <TT
+CLASS="LITERAL"
+>true</TT
+>, change the default
+as in:
+<PRE
+CLASS="PROGRAMLISTING"
+>let config = { default_config with idref_pass = true }</PRE
+>
+The background is that I can add more options to the record in future versions
+of the parser without breaking your programs.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Do I need extra nodes for processing instructions? </B
+>By default, such nodes are not created. This does not mean that the
+processing instructions are lost; however, you cannot find out the exact
+location where they occur. For example, the following XML text
+
+<PRE
+CLASS="PROGRAMLISTING"
+><x><?pi1?><y/><?pi2?></x> </PRE
+>
+
+will normally create one element node for <TT
+CLASS="LITERAL"
+>x</TT
+> containing
+<I
+CLASS="EMPHASIS"
+>one</I
+> subnode for <TT
+CLASS="LITERAL"
+>y</TT
+>. The processing
+instructions are attached to <TT
+CLASS="LITERAL"
+>x</TT
+> in a separate hash table; you
+can access them using <TT
+CLASS="LITERAL"
+>x # pinstr "pi1"</TT
+> and <TT
+CLASS="LITERAL"
+>x #
+pinstr "pi2"</TT
+>, respectively. The information is lost where the
+instructions occur within <TT
+CLASS="LITERAL"
+>x</TT
+>.</P
+></DIV
+><P
+>If the option <TT
+CLASS="LITERAL"
+>enable_pinstr_nodes</TT
+> is
+turned on, the parser creates extra nodes <TT
+CLASS="LITERAL"
+>pi1</TT
+> and
+<TT
+CLASS="LITERAL"
+>pi2</TT
+> such that the subnodes of <TT
+CLASS="LITERAL"
+>x</TT
+> are now:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>x # sub_nodes = [ pi1; y; pi2 ]</PRE
+>
+
+The extra nodes contain the processing instructions in the usual way, i.e. you
+can access them using <TT
+CLASS="LITERAL"
+>pi1 # pinstr "pi1"</TT
+> and <TT
+CLASS="LITERAL"
+>pi2 #
+pinstr "pi2"</TT
+>, respectively.</P
+><P
+>Note that you will need an exemplar for the PI nodes (see
+<TT
+CLASS="LITERAL"
+>make_spec_from_alist</TT
+>).</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Do I need a super root node? </B
+>By default, there is no super root node. The
+<TT
+CLASS="LITERAL"
+>document</TT
+> object refers directly to the node representing the
+root element of the document, i.e.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>doc # root = r</PRE
+>
+
+if <TT
+CLASS="LITERAL"
+>r</TT
+> is the root node. This is sometimes inconvenient: (1)
+Some algorithms become simpler if every node has a parent, even the root
+node. (2) Some standards such as XPath call the "root node" the node whose
+child represents the root of the document. (3) The super root node can serve
+as a container for processing instructions outside the root element. Because of
+these reasons, it is possible to create an extra super root node, whose child
+is the root node:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>doc # root = sr &&
+sr # sub_nodes = [ r ]</PRE
+>
+
+When extra nodes are also created for processing instructions, these nodes can
+be added to the super root node if they occur outside the root element (reason
+(3)), and the order reflects the order in the source text.</P
+></DIV
+><P
+>Note that you will need an exemplar for the super root node
+(see <TT
+CLASS="LITERAL"
+>make_spec_from_alist</TT
+>).</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>What is the effect of the UTF-8 encoding? </B
+>By default, the parser represents strings (with few
+exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
+and fonts for this encoding.</P
+></DIV
+><P
+>However, internationalization may require that you switch over
+to UTF-8 encoding. In most environments, the immediate effect will be that you
+cannot read strings with character codes >= 160 any longer; your terminal will
+only show funny glyph combinations. It is strongly recommended to install
+Unicode fonts (<A
+HREF="http://czyborra.com/unifont/"
+TARGET="_top"
+>GNU Unifont</A
+>,
+<A
+HREF="http://www.cl.cam.ac.uk/~mgk25/download/ucs-fonts.tar.gz"
+TARGET="_top"
+>Markus Kuhn's fonts</A
+>) and <A
+HREF="http://myweb.clark.net/pub/dickey/xterm/xterm.html"
+TARGET="_top"
+>terminal emulators
+that can handle UTF-8 byte sequences</A
+>. Furthermore, a Unicode editor may
+be helpful (such as <A
+HREF="ftp://metalab.unc.edu/pub/Linux/apps/editors/X/"
+TARGET="_top"
+>Yudit</A
+>). There are
+also <A
+HREF="http://www.cl.cam.ac.uk/~mgk25/unicode.html"
+TARGET="_top"
+>FAQ</A
+> by
+Markus Kuhn.</P
+><P
+>By setting <TT
+CLASS="LITERAL"
+>encoding</TT
+> to
+<TT
+CLASS="LITERAL"
+>`Enc_utf8</TT
+> all strings originating from the parsed XML
+document are represented as UTF-8 strings. This includes not only character
+data and attribute values but also element names, attribute names and so on, as
+it is possible to use any Unicode letter to form such names. Strictly
+speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
+will have difficulties when validating documents containing
+non-ISO-8859-1-names.</P
+><P
+>This mode does not have any impact on the external
+representation of documents. The character set assumed when reading a document
+is set in the XML declaration, and character set when writing a document must
+be passed to the <TT
+CLASS="LITERAL"
+>write</TT
+> method.</P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>How do I check that nodes exist which are referred by IDREF attributes? </B
+>First, you must create an index of all occurring ID
+attributes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let index = new hash_index</PRE
+>
+
+This index must be passed to the parsing function:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>parse_document_entity
+ ~id_index:(index :> index)
+ config source spec</PRE
+>
+
+Next, you must turn on the <TT
+CLASS="LITERAL"
+>idref_pass</TT
+> mode:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let config = { default_config with idref_pass = true }</PRE
+>
+
+Note that now the whole document tree will be traversed, and every node will be
+checked for IDREF and IDREFS attributes. If the tree is big, this may take some
+time.</P
+></DIV
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>What are deterministic content models? </B
+>These type of models can speed up the validation checks;
+furthermore they ensure SGML-compatibility. In particular, a content model is
+deterministic if the parser can determine the actually used alternative by
+inspecting only the current token. For example, this element has
+non-deterministic contents:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT x ((u,v) | (u,y+) | v)></PRE
+>
+
+If the first element in <TT
+CLASS="LITERAL"
+>x</TT
+> is <TT
+CLASS="LITERAL"
+>u</TT
+>, the
+parser does not know which of the alternatives <TT
+CLASS="LITERAL"
+>(u,v)</TT
+> or
+<TT
+CLASS="LITERAL"
+>(u,y+)</TT
+> will work; the parser must also inspect the second
+element to be able to distinguish between the alternatives. Because such
+look-ahead (or "guessing") is required, this example is
+non-deterministic.</P
+></DIV
+><P
+>The XML standard demands that content models must be
+deterministic. So it is recommended to turn the option
+<TT
+CLASS="LITERAL"
+>accept_only_deterministic_models</TT
+> on; however, PXP can also
+process non-deterministic models using a backtracking algorithm.</P
+><P
+>Deterministic models ensure that validation can be performed in
+linear time. In order to get the maximum benefits, PXP also implements a
+special validator that profits from deterministic models; this is the
+deterministic finite automaton (DFA). This validator is enabled per element
+type if the element type has a deterministic model and if the option
+<TT
+CLASS="LITERAL"
+>validate_by_dfa</TT
+> is turned on.</P
+><P
+>In general, I expect that the DFA method is faster than the
+backtracking method; especially in the worst case the DFA takes only linear
+time. However, if the content model has only few alternatives and the
+alternatives do not nest, the backtracking algorithm may be better.</P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1812.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1965.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The DTD classes</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Updates</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Updates</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Configuring and calling the parser"
+HREF="c1567.html"><LINK
+REL="PREVIOUS"
+TITLE="Invoking the parser"
+HREF="x1818.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x1818.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 4. Configuring and calling the parser</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+> </TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN1965"
+>4.5. Updates</A
+></H1
+><P
+><I
+CLASS="EMPHASIS"
+>Some (often later added) features that are otherwise
+not explained in the manual but worth to be mentioned.</I
+></P
+><P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+>Methods node_position, node_path, nth_node,
+previous_node, next_node for nodes: See pxp_document.mli</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+>Functions to determine the document order of nodes:
+compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli</P
+></LI
+></UL
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x1818.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+> </TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Invoking the parser</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c1567.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+> </TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>A complete example: The readme DTD</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="What is XML?"
+HREF="c36.html"><LINK
+REL="PREVIOUS"
+TITLE="Highlights of XML"
+HREF="x107.html"><LINK
+REL="NEXT"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x107.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 1. What is XML?</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c533.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="SECT.README.DTD"
+>1.3. A complete example: The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD</A
+></H1
+><P
+>The reason for <I
+CLASS="EMPHASIS"
+>readme</I
+> was that I often wrote two versions
+of files such as README and INSTALL which explain aspects of a distributed
+software archive; one version was ASCII-formatted, the other was written in
+HTML. Maintaining both versions means double amount of work, and changes
+of one version may be forgotten in the other version. To improve this situation
+I invented the <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD which allows me to maintain only
+one source written as XML document, and to generate the ASCII and the HTML
+version from it.</P
+><P
+>In this section, I explain only the DTD. The <I
+CLASS="EMPHASIS"
+>readme</I
+> DTD is
+contained in the <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> distribution together with the two converters to
+produce ASCII and HTML. Another <A
+HREF="x738.html"
+>section</A
+> of this manual describes the HTML
+converter.</P
+><P
+>The documents have a simple structure: There are up to three levels of nested
+sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
+outermost element has usually the type <TT
+CLASS="LITERAL"
+>readme</TT
+>, it is
+declared by
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT readme (sect1+)>
+<!ATTLIST readme
+ title CDATA #REQUIRED></PRE
+>
+
+This means that this element contains one or more sections of the first level
+(element type <TT
+CLASS="LITERAL"
+>sect1</TT
+>), and that the element has a required
+attribute <TT
+CLASS="LITERAL"
+>title</TT
+> containing character data (CDATA). Note that
+<TT
+CLASS="LITERAL"
+>readme</TT
+> elements must not contain text data.</P
+><P
+>The three levels of sections are declared as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT sect1 (title,(sect2|p|ul)+)>
+
+<!ELEMENT sect2 (title,(sect3|p|ul)+)>
+
+<!ELEMENT sect3 (title,(p|ul)+)></PRE
+>
+
+Every section has a <TT
+CLASS="LITERAL"
+>title</TT
+> element as first subelement. After
+the title an arbitrary but non-empty sequence of inner sections, paragraphs and
+item lists follows. Note that the inner sections must belong to the next higher
+section level; <TT
+CLASS="LITERAL"
+>sect3</TT
+> elements must not contain inner
+sections because there is no next higher level.</P
+><P
+>Obviously, all three declarations allow paragraphs (<TT
+CLASS="LITERAL"
+>p</TT
+>) and
+item lists (<TT
+CLASS="LITERAL"
+>ul</TT
+>). The definition can be simplified at this
+point by using a parameter entity:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % p.like "p|ul">
+
+<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
+
+<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
+
+<!ELEMENT sect3 (title,(%p.like;)+)></PRE
+>
+
+Here, the entity <TT
+CLASS="LITERAL"
+>p.like</TT
+> is nothing but a macro abbreviating
+the same sequence of declarations; if new elements on the same level as
+<TT
+CLASS="LITERAL"
+>p</TT
+> and <TT
+CLASS="LITERAL"
+>ul</TT
+> are later added, it is
+sufficient only to change the entity definition. Note that there are some
+restrictions on the usage of entities in this context; most important, entities
+containing a left paranthesis must also contain the corresponding right
+paranthesis. </P
+><P
+>Note that the entity <TT
+CLASS="LITERAL"
+>p.like</TT
+> is a
+<I
+CLASS="EMPHASIS"
+>parameter</I
+> entity, i.e. the ENTITY declaration contains a
+percent sign, and the entity is referred to by
+<TT
+CLASS="LITERAL"
+>%p.like;</TT
+>. This kind of entity must be used to abbreviate
+parts of the DTD; the <I
+CLASS="EMPHASIS"
+>general</I
+> entities declared without
+percent sign and referred to as <TT
+CLASS="LITERAL"
+>&name;</TT
+> are not allowed
+in this context.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>title</TT
+> element specifies the title of the section in
+which it occurs. The title is given as character data, optionally interspersed
+with line breaks (<TT
+CLASS="LITERAL"
+>br</TT
+>):
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT title (#PCDATA|br)*></PRE
+>
+
+Compared with the <TT
+CLASS="LITERAL"
+>title</TT
+> <I
+CLASS="EMPHASIS"
+>attribute</I
+> of
+the <TT
+CLASS="LITERAL"
+>readme</TT
+> element, this element allows inner markup
+(i.e. <TT
+CLASS="LITERAL"
+>br</TT
+>) while attribute values do not: It is an error if
+an attribute value contains the left angle bracket < literally such that it
+is impossible to include inner elements. </P
+><P
+>The paragraph element <TT
+CLASS="LITERAL"
+>p</TT
+> has a structure similar to
+<TT
+CLASS="LITERAL"
+>title</TT
+>, but it allows more inner elements:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ENTITY % text "br|code|em|footnote|a">
+
+<!ELEMENT p (#PCDATA|%text;)*></PRE
+>
+
+Line breaks do not have inner structure, so they are declared as being empty:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT br EMPTY></PRE
+>
+
+This means that really nothing is allowed within <TT
+CLASS="LITERAL"
+>br</TT
+>; you
+must always write <TT
+CLASS="LITERAL"
+><br></br></TT
+> or abbreviated
+<TT
+CLASS="LITERAL"
+><br/></TT
+>.</P
+><P
+>Code samples should be marked up by the <TT
+CLASS="LITERAL"
+>code</TT
+> tag; emphasized
+text can be indicated by <TT
+CLASS="LITERAL"
+>em</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT code (#PCDATA)>
+
+<!ELEMENT em (#PCDATA|%text;)*></PRE
+>
+
+That <TT
+CLASS="LITERAL"
+>code</TT
+> elements are not allowed to contain further markup
+while <TT
+CLASS="LITERAL"
+>em</TT
+> elements do is a design decision by the author of
+the DTD.</P
+><P
+>Unordered lists simply consists of one or more list items, and a list item may
+contain paragraph-level material:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT ul (li+)>
+
+<!ELEMENT li (%p.like;)*></PRE
+>
+
+Footnotes are described by the text of the note; this text may contain
+text-level markup. There is no mechanism to describe the numbering scheme of
+footnotes, or to specify how footnote references are printed.
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT footnote (#PCDATA|%text;)*></PRE
+>
+
+Hyperlinks are written as in HTML. The anchor tag contains the text describing
+where the link points to, and the <TT
+CLASS="LITERAL"
+>href</TT
+> attribute is the
+pointer (as URL). There is no way to describe locations of "hash marks". If the
+link refers to another <I
+CLASS="EMPHASIS"
+>readme</I
+> document, the attribute
+<TT
+CLASS="LITERAL"
+>readmeref</TT
+> should be used instead of <TT
+CLASS="LITERAL"
+>href</TT
+>.
+The reason is that the converted document has usually a different system
+identifier (file name), and the link to a converted document must be
+converted, too.
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ELEMENT a (#PCDATA)*>
+<!ATTLIST a
+ href CDATA #IMPLIED
+ readmeref CDATA #IMPLIED
+></PRE
+>
+
+Note that although it is only sensible to specify one of the two attributes,
+the DTD has no means to express this restriction.</P
+><P
+>So far the DTD. Finally, here is a document for it:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd">
+<readme title="How to use the readme converters">
+<sect1>
+ <title>Usage</title>
+ <p>
+ The <em>readme</em> converter is invoked on the command line by:
+ </p>
+ <p>
+ <code>readme [ -text | -html ] input.xml</code>
+ </p>
+ <p>
+ Here a list of options:
+ </p>
+ <ul>
+ <li>
+ <p><code>-text</code>: specifies that ASCII output should be produced</p>
+ </li>
+ <li>
+ <p><code>-html</code>: specifies that HTML output should be produced</p>
+ </li>
+ </ul>
+ <p>
+ The input file must be given on the command line. The converted output is
+ printed to <em>stdout</em>.
+ </p>
+</sect1>
+<sect1>
+ <title>Author</title>
+ <p>
+ The program has been written by
+ <a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>.
+ </p>
+</sect1>
+</readme></PRE
+> </P
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x107.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Highlights of XML</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c36.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>How to parse a document from an application</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="PREVIOUS"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="NEXT"
+TITLE="Class-based processing of the node tree"
+HREF="x675.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c533.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x675.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN550"
+>2.2. How to parse a document from an application</A
+></H1
+><P
+>Let me first give a rough overview of the object model of the parser. The
+following items are represented by objects:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>Documents:</I
+> The document representation is more or less the
+anchor for the application; all accesses to the parsed entities start here. It
+is described by the class <TT
+CLASS="LITERAL"
+>document</TT
+> contained in the module
+<TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>. You can get some global information, such
+as the XML declaration the document begins with, the DTD of the document,
+global processing instructions, and most important, the document tree. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>The contents of documents:</I
+> The contents have the structure
+of a tree: Elements contain other elements and text<A
+NAME="AEN562"
+HREF="#FTN.AEN562"
+>[1]</A
+>.
+
+The common type to represent both kinds of content is <TT
+CLASS="LITERAL"
+>node</TT
+>
+which is a class type that unifies the properties of elements and character
+data. Every node has a list of children (which is empty if the element is empty
+or the node represents text); nodes may have attributes; nodes have always text
+contents. There are two implementations of <TT
+CLASS="LITERAL"
+>node</TT
+>, the class
+<TT
+CLASS="LITERAL"
+>element_impl</TT
+> for elements, and the class
+<TT
+CLASS="LITERAL"
+>data_impl</TT
+> for text data. You find these classes and class
+types in the module <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>, too.</P
+><P
+>Note that attribute lists are represented by non-class values.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>The node extension:</I
+> For advanced usage, every node of the
+document may have an associated <I
+CLASS="EMPHASIS"
+>extension</I
+> which is simply
+a second object. This object must have the three methods
+<TT
+CLASS="LITERAL"
+>clone</TT
+>, <TT
+CLASS="LITERAL"
+>node</TT
+>, and
+<TT
+CLASS="LITERAL"
+>set_node</TT
+> as bare minimum, but you are free to add methods as
+you want. This is the preferred way to add functionality to the document
+tree<A
+NAME="AEN582"
+HREF="#FTN.AEN582"
+>[2]</A
+>. The class type <TT
+CLASS="LITERAL"
+>extension</TT
+> is
+defined in <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>, too.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>The DTD:</I
+> Sometimes it is necessary to access the DTD of a
+document; the average application does not need this feature. The class
+<TT
+CLASS="LITERAL"
+>dtd</TT
+> describes DTDs, and makes it possible to get
+representations of element, entity, and notation declarations as well as
+processing instructions contained in the DTD. This class, and
+<TT
+CLASS="LITERAL"
+>dtd_element</TT
+>, <TT
+CLASS="LITERAL"
+>dtd_notation</TT
+>, and
+<TT
+CLASS="LITERAL"
+>proc_instruction</TT
+> can be found in the module
+<TT
+CLASS="LITERAL"
+>Pxp_dtd</TT
+>. There are a couple of classes representing
+different kinds of entities; these can be found in the module
+<TT
+CLASS="LITERAL"
+>Pxp_entity</TT
+>. </P
+></LI
+></UL
+>
+
+Additionally, the following modules play a role:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>Pxp_yacc:</I
+> Here the main parsing functions such as
+<TT
+CLASS="LITERAL"
+>parse_document_entity</TT
+> are located. Some additional types and
+functions allow the parser to be configured in a non-standard way.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><I
+CLASS="EMPHASIS"
+>Pxp_types:</I
+> This is a collection of basic types and
+exceptions. </P
+></LI
+></UL
+>
+
+There are some further modules that are needed internally but are not part of
+the API.</P
+><P
+>Let the document to be parsed be stored in a file called
+<TT
+CLASS="LITERAL"
+>doc.xml</TT
+>. The parsing process is started by calling the
+function
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val parse_document_entity : config -> source -> 'ext spec -> 'ext document</PRE
+>
+
+defined in the module <TT
+CLASS="LITERAL"
+>Pxp_yacc</TT
+>. The first argument
+specifies some global properties of the parser; it is recommended to start with
+the <TT
+CLASS="LITERAL"
+>default_config</TT
+>. The second argument determines where the
+document to be parsed comes from; this may be a file, a channel, or an entity
+ID. To parse <TT
+CLASS="LITERAL"
+>doc.xml</TT
+>, it is sufficient to pass
+<TT
+CLASS="LITERAL"
+>from_file "doc.xml"</TT
+>. </P
+><P
+>The third argument passes the object specification to use. Roughly
+speaking, it determines which classes implement the node objects of which
+element types, and which extensions are to be used. The <TT
+CLASS="LITERAL"
+>'ext</TT
+>
+polymorphic variable is the type of the extension. For the moment, let us
+simply pass <TT
+CLASS="LITERAL"
+>default_spec</TT
+> as this argument, and ignore it.</P
+><P
+>So the following expression parses <TT
+CLASS="LITERAL"
+>doc.xml</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>open Pxp_yacc
+let d = parse_document_entity default_config (from_file "doc.xml") default_spec</PRE
+>
+
+Note that <TT
+CLASS="LITERAL"
+>default_config</TT
+> implies that warnings are collected
+but not printed. Errors raise one of the exception defined in
+<TT
+CLASS="LITERAL"
+>Pxp_types</TT
+>; to get readable errors and warnings catch the
+exceptions as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class warner =
+ object
+ method warn w =
+ print_endline ("WARNING: " ^ w)
+ end
+;;
+
+try
+ let config = { default_config with warner = new warner } in
+ let d = parse_document_entity config (from_file "doc.xml") default_spec
+ in
+ ...
+with
+ e ->
+ print_endline (Pxp_types.string_of_exn e)</PRE
+>
+
+Now <TT
+CLASS="LITERAL"
+>d</TT
+> is an object of the <TT
+CLASS="LITERAL"
+>document</TT
+>
+class. If you want the node tree, you can get the root element by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let root = d # root</PRE
+>
+
+and if you would rather like to access the DTD, determine it by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let dtd = d # dtd</PRE
+>
+
+As it is more interesting, let us investigate the node tree now. Given the root
+element, it is possible to recursively traverse the whole tree. The children of
+a node <TT
+CLASS="LITERAL"
+>n</TT
+> are returned by the method
+<TT
+CLASS="LITERAL"
+>sub_nodes</TT
+>, and the type of a node is returned by
+<TT
+CLASS="LITERAL"
+>node_type</TT
+>. This function traverses the tree, and prints the
+type of each node:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec print_structure n =
+ let ntype = n # node_type in
+ match ntype with
+ T_element name ->
+ print_endline ("Element of type " ^ name);
+ let children = n # sub_nodes in
+ List.iter print_structure children
+ | T_data ->
+ print_endline "Data"
+ | _ ->
+ (* Other node types are not possible unless the parser is configured
+ differently.
+ *)
+ assert false</PRE
+>
+
+You can call this function by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>print_structure root</PRE
+>
+
+The type returned by <TT
+CLASS="LITERAL"
+>node_type</TT
+> is either <TT
+CLASS="LITERAL"
+>T_element
+name</TT
+> or <TT
+CLASS="LITERAL"
+>T_data</TT
+>. The <TT
+CLASS="LITERAL"
+>name</TT
+> of the
+element type is the string included in the angle brackets. Note that only
+elements have children; data nodes are always leaves of the tree.</P
+><P
+>There are some more methods in order to access a parsed node tree:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # parent</TT
+>: Returns the parent node, or raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> if the node is already the root</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # root</TT
+>: Returns the root of the node tree. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # attribute a</TT
+>: Returns the value of the attribute with
+name <TT
+CLASS="LITERAL"
+>a</TT
+>. The method returns a value for every
+<I
+CLASS="EMPHASIS"
+>declared</I
+> attribute, independently of whether the attribute
+instance is defined or not. If the attribute is not declared,
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> will be raised. (In well-formedness mode, every
+attribute is considered as being implicitly declared with type
+<TT
+CLASS="LITERAL"
+>CDATA</TT
+>.) </P
+><P
+>The following return values are possible: <TT
+CLASS="LITERAL"
+>Value s</TT
+>,
+<TT
+CLASS="LITERAL"
+>Valuelist sl</TT
+> , and <TT
+CLASS="LITERAL"
+>Implied_value</TT
+>.
+The first two value types indicate that the attribute value is available,
+either because there is a definition
+<TT
+CLASS="LITERAL"
+><TT
+CLASS="REPLACEABLE"
+><I
+>a</I
+></TT
+>="<TT
+CLASS="REPLACEABLE"
+><I
+>value</I
+></TT
+>"</TT
+>
+in the XML text, or because there is a default value (declared in the
+DTD). Only if both the instance definition and the default declaration are
+missing, the latter value <TT
+CLASS="LITERAL"
+>Implied_value</TT
+> will be returned.</P
+><P
+>In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
+IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
+<TT
+CLASS="LITERAL"
+>Value s</TT
+> back, where <TT
+CLASS="LITERAL"
+>s</TT
+> is the normalized
+string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
+represent list values, and the parser splits the XML literal into several
+tokens and returns these tokens as <TT
+CLASS="LITERAL"
+>Valuelist sl</TT
+>.</P
+><P
+>Normalization means that entity references (the
+<TT
+CLASS="LITERAL"
+>&<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</TT
+> tokens) and
+character references
+(<TT
+CLASS="LITERAL"
+>&#<TT
+CLASS="REPLACEABLE"
+><I
+>number</I
+></TT
+>;</TT
+>) are replaced
+by the text they represent, and that white space characters are converted into
+plain spaces.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>n # data</TT
+>: Returns the character data contained in the
+node. For data nodes, the meaning is obvious as this is the main content of
+data nodes. For element nodes, this method returns the concatenated contents of
+all inner data nodes.</P
+><P
+>Note that entity references included in the text are resolved while they are
+being parsed; for example the text "a &lt;&gt; b" will be returned
+as "a <> b" by this method. Spaces of data nodes are always
+preserved. Newlines are preserved, but always converted to \n characters even
+if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
+data nodes because the parser collapses all data material at one location into
+one node. (However, if you create your own tree or transform the parsed tree,
+it is possible to have adjacent data nodes.)</P
+><P
+>Note that elements that do <I
+CLASS="EMPHASIS"
+>not</I
+> allow #PCDATA as content
+will not have data nodes as children. This means that spaces and newlines, the
+only character material allowed for such elements, are silently dropped.</P
+></LI
+></UL
+>
+
+For example, if the task is to print all contents of elements with type
+"valuable" whose attribute "priority" is "1", this function can help:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec print_valuable_prio1 n =
+ let ntype = n # node_type in
+ match ntype with
+ T_element "valuable" when n # attribute "priority" = Value "1" ->
+ print_endline "Valuable node with priotity 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ let children = n # sub_nodes in
+ List.iter print_valuable_prio1 children
+ | _ ->
+ assert false</PRE
+>
+
+You can call this function by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>print_valuable_prio1 root</PRE
+>
+
+If you like a DSSSL-like style, you can make the function
+<TT
+CLASS="LITERAL"
+>process_children</TT
+> explicit:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec print_valuable_prio1 n =
+
+ let process_children n =
+ let children = n # sub_nodes in
+ List.iter print_valuable_prio1 children
+ in
+
+ let ntype = n # node_type in
+ match ntype with
+ T_element "valuable" when n # attribute "priority" = Value "1" ->
+ print_endline "Valuable node with priority 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ process_children n
+ | _ ->
+ assert false</PRE
+>
+
+So far, O'Caml is now a simple "style-sheet language": You can form a big
+"match" expression to distinguish between all significant cases, and provide
+different reactions on different conditions. But this technique has
+limitations; the "match" expression tends to get larger and larger, and it is
+difficult to store intermediate values as there is only one big
+recursion. Alternatively, it is also possible to represent the various cases as
+classes, and to use dynamic method lookup to find the appropiate class. The
+next section explains this technique in detail. </P
+></DIV
+><H3
+CLASS="FOOTNOTES"
+>Notes</H3
+><TABLE
+BORDER="0"
+CLASS="FOOTNOTES"
+WIDTH="100%"
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN562"
+HREF="x550.html#AEN562"
+>[1]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>Elements may
+also contain processing instructions. Unlike other document models, <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+>
+separates processing instructions from the rest of the text and provides a
+second interface to access them (method <TT
+CLASS="LITERAL"
+>pinstr</TT
+>). However,
+there is a parser option (<TT
+CLASS="LITERAL"
+>enable_pinstr_nodes</TT
+>) which changes
+the behaviour of the parser such that extra nodes for processing instructions
+are included into the tree.</P
+><P
+>Furthermore, the tree does normally not contain nodes for XML comments;
+they are ignored by default. Again, there is an option
+(<TT
+CLASS="LITERAL"
+>enable_comment_nodes</TT
+>) changing this.</P
+></TD
+></TR
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN582"
+HREF="x550.html#AEN582"
+>[2]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>Due to the typing system it is more or less impossible to
+derive recursive classes in O'Caml. To get around this, it is common practice
+to put the modifiable or extensible part of recursive objects into parallel
+objects.</P
+></TD
+></TR
+></TABLE
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x675.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Class-based processing of the node tree</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Class-based processing of the node tree</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="PREVIOUS"
+TITLE="How to parse a document from an application"
+HREF="x550.html"><LINK
+REL="NEXT"
+TITLE="Example: An HTML backend for the readme
+DTD"
+HREF="x738.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x550.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x738.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN675"
+>2.3. Class-based processing of the node tree</A
+></H1
+><P
+>By default, the parsed node tree consists of objects of the same class; this is
+a good design as long as you want only to access selected parts of the
+document. For complex transformations, it may be better to use different
+classes for objects describing different element types.</P
+><P
+>For example, if the DTD declares the element types <TT
+CLASS="LITERAL"
+>a</TT
+>,
+<TT
+CLASS="LITERAL"
+>b</TT
+>, and <TT
+CLASS="LITERAL"
+>c</TT
+>, and if the task is to convert
+an arbitrary document into a printable format, the idea is to define for every
+element type a separate class that has a method <TT
+CLASS="LITERAL"
+>print</TT
+>. The
+classes are <TT
+CLASS="LITERAL"
+>eltype_a</TT
+>, <TT
+CLASS="LITERAL"
+>eltype_b</TT
+>, and
+<TT
+CLASS="LITERAL"
+>eltype_c</TT
+>, and every class implements
+<TT
+CLASS="LITERAL"
+>print</TT
+> such that elements of the type corresponding to the
+class are converted to the output format.</P
+><P
+>The parser supports such a design directly. As it is impossible to derive
+recursive classes in O'Caml<A
+NAME="AEN688"
+HREF="#FTN.AEN688"
+>[1]</A
+>, the specialized element classes cannot be formed by
+simply inheriting from the built-in classes of the parser and adding methods
+for customized functionality. To get around this limitation, every node of the
+document tree is represented by <I
+CLASS="EMPHASIS"
+>two</I
+> objects, one called
+"the node" and containing the recursive definition of the tree, one called "the
+extension". Every node object has a reference to the extension, and the
+extension has a reference to the node. The advantage of this model is that it
+is now possible to customize the extension without affecting the typing
+constraints of the recursive node definition.</P
+><P
+>Every extension must have the three methods <TT
+CLASS="LITERAL"
+>clone</TT
+>,
+<TT
+CLASS="LITERAL"
+>node</TT
+>, and <TT
+CLASS="LITERAL"
+>set_node</TT
+>. The method
+<TT
+CLASS="LITERAL"
+>clone</TT
+> creates a deep copy of the extension object and
+returns it; <TT
+CLASS="LITERAL"
+>node</TT
+> returns the node object for this extension
+object; and <TT
+CLASS="LITERAL"
+>set_node</TT
+> is used to tell the extension object
+which node is associated with it, this method is automatically called when the
+node tree is initialized. The following definition is a good starting point
+for these methods; usually <TT
+CLASS="LITERAL"
+>clone</TT
+> must be further refined
+when instance variables are added to the class:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class custom_extension =
+ object (self)
+
+ val mutable node = (None : custom_extension node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ end</PRE
+>
+
+This part of the extension is usually the same for all classes, so it is a good
+idea to consider <TT
+CLASS="LITERAL"
+>custom_extension</TT
+> as the super-class of the
+further class definitions. Continuining the example of above, we can define the
+element type classes as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class virtual custom_extension =
+ object (self)
+ ... clone, node, set_node defined as above ...
+
+ method virtual print : out_channel -> unit
+ end
+
+class eltype_a =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end</PRE
+>
+
+The method <TT
+CLASS="LITERAL"
+>print</TT
+> can now be implemented for every element
+type separately. Note that you get the associated node by invoking
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node</PRE
+>
+
+and you get the extension object of a node <TT
+CLASS="LITERAL"
+>n</TT
+> by writing
+
+<PRE
+CLASS="PROGRAMLISTING"
+>n # extension</PRE
+>
+
+It is guaranteed that
+
+<PRE
+CLASS="PROGRAMLISTING"
+>self # node # extension == self</PRE
+>
+
+always holds.</P
+><P
+>Here are sample definitions of the <TT
+CLASS="LITERAL"
+>print</TT
+>
+methods:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class eltype_a =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Nodes <a>...</a> are only containers: *)
+ output_string ch "(";
+ List.iter
+ (fun n -> n # extension # print ch)
+ (self # node # sub_nodes);
+ output_string ch ")";
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the value of the CDATA attribute "print": *)
+ match self # node # attribute "print" with
+ Value s -> output_string ch s
+ | Implied_value -> output_string ch "<missing>"
+ | Valuelist l -> assert false
+ (* not possible because the att is CDATA *)
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the contents of this element: *)
+ output_string ch (self # node # data)
+ end
+
+class null_extension =
+ object (self)
+ inherit custom_extension
+ method print ch = assert false
+ end</PRE
+></P
+><P
+>The remaining task is to configure the parser such that these extension classes
+are actually used. Here another problem arises: It is not possible to
+dynamically select the class of an object to be created. As workaround,
+<SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+> allows the user to specify <I
+CLASS="EMPHASIS"
+>exemplar objects</I
+> for
+the various element types; instead of creating the nodes of the tree by
+applying the <TT
+CLASS="LITERAL"
+>new</TT
+> operator the nodes are produced by
+duplicating the exemplars. As object duplication preserves the class of the
+object, one can create fresh objects of every class for which previously an
+exemplar has been registered.</P
+><P
+>Exemplars are meant as objects without contents, the only interesting thing is
+that exemplars are instances of a certain class. The creation of an exemplar
+for an element node can be done by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let element_exemplar = new element_impl extension_exemplar</PRE
+>
+
+And a data node exemplar is created by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let data_exemplar = new data_impl extension_exemplar</PRE
+>
+
+The classes <TT
+CLASS="LITERAL"
+>element_impl</TT
+> and <TT
+CLASS="LITERAL"
+>data_impl</TT
+>
+are defined in the module <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>. The constructors
+initialize the fresh objects as empty objects, i.e. without children, without
+data contents, and so on. The <TT
+CLASS="LITERAL"
+>extension_exemplar</TT
+> is the
+initial extension object the exemplars are associated with. </P
+><P
+>Once the exemplars are created and stored somewhere (e.g. in a hash table), you
+can take an exemplar and create a concrete instance (with contents) by
+duplicating it. As user of the parser you are normally not concerned with this
+as this is part of the internal logic of the parser, but as background knowledge
+it is worthwhile to mention that the two methods
+<TT
+CLASS="LITERAL"
+>create_element</TT
+> and <TT
+CLASS="LITERAL"
+>create_data</TT
+> actually
+perform the duplication of the exemplar for which they are invoked,
+additionally apply modifications to the clone, and finally return the new
+object. Moreover, the extension object is copied, too, and the new node object
+is associated with the fresh extension object. Note that this is the reason why
+every extension object must have a <TT
+CLASS="LITERAL"
+>clone</TT
+> method.</P
+><P
+>The configuration of the set of exemplars is passed to the
+<TT
+CLASS="LITERAL"
+>parse_document_entity</TT
+> function as third argument. In our
+example, this argument can be set up as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl (new null_extension))
+ ~default_element_exemplar: (new element_impl (new null_extension))
+ ~element_alist:
+ [ "a", new element_impl (new eltype_a);
+ "b", new element_impl (new eltype_b);
+ "c", new element_impl (new eltype_c);
+ ]
+ ()</PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>~element_alist</TT
+> function argument defines the mapping
+from element types to exemplars as associative list. The argument
+<TT
+CLASS="LITERAL"
+>~data_exemplar</TT
+> specifies the exemplar for data nodes, and
+the <TT
+CLASS="LITERAL"
+>~default_element_exemplar</TT
+> is used whenever the parser
+finds an element type for which the associative list does not define an
+exemplar. </P
+><P
+>The configuration is now complete. You can still use the same parsing
+functions, only the initialization is a bit different. For example, call the
+parser by:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let d = parse_document_entity default_config (from_file "doc.xml") spec</PRE
+>
+
+Note that the resulting document <TT
+CLASS="LITERAL"
+>d</TT
+> has a usable type;
+especially the <TT
+CLASS="LITERAL"
+>print</TT
+> method we added is visible. So you can
+print your document by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>d # root # extension # print stdout</PRE
+></P
+><P
+>This object-oriented approach looks rather complicated; this is mostly caused
+by working around some problems of the strict typing system of O'Caml. Some
+auxiliary concepts such as extensions were needed, but the practical
+consequences are low. In the next section, one of the examples of the
+distribution is explained, a converter from <I
+CLASS="EMPHASIS"
+>readme</I
+>
+documents to HTML.</P
+></DIV
+><H3
+CLASS="FOOTNOTES"
+>Notes</H3
+><TABLE
+BORDER="0"
+CLASS="FOOTNOTES"
+WIDTH="100%"
+><TR
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="5%"
+><A
+NAME="FTN.AEN688"
+HREF="x675.html#AEN688"
+>[1]</A
+></TD
+><TD
+ALIGN="LEFT"
+VALIGN="TOP"
+WIDTH="95%"
+><P
+>The problem is that the subclass is
+usually not a subtype in this case because O'Caml has a contravariant subtyping
+rule. </P
+></TD
+></TR
+></TABLE
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x550.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x738.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>How to parse a document from an application</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>Example: An HTML backend for the readme
+DTD</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="Using PXP"
+HREF="c533.html"><LINK
+REL="PREVIOUS"
+TITLE="Class-based processing of the node tree"
+HREF="x675.html"><LINK
+REL="NEXT"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="x675.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 2. Using <SPAN
+CLASS="ACRONYM"
+>PXP</SPAN
+></TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="c893.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="SECT.README.TO-HTML"
+>2.4. Example: An HTML backend for the <I
+CLASS="EMPHASIS"
+>readme</I
+>
+DTD</A
+></H1
+><P
+>The converter from <I
+CLASS="EMPHASIS"
+>readme</I
+> documents to HTML
+documents follows strictly the approach to define one class per element
+type. The HTML code is similar to the <I
+CLASS="EMPHASIS"
+>readme</I
+> source,
+because of this most elements can be converted in the following way: Given the
+input element
+
+<PRE
+CLASS="PROGRAMLISTING"
+><e>content</e></PRE
+>
+
+the conversion text is the concatenation of a computed prefix, the recursively
+converted content, and a computed suffix. </P
+><P
+>Only one element type cannot be handled by this scheme:
+<TT
+CLASS="LITERAL"
+>footnote</TT
+>. Footnotes are collected while they are found in
+the input text, and they are printed after the main text has been converted and
+printed. </P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN747"
+>2.4.1. Header</A
+></H2
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>open Pxp_types
+open Pxp_document</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN751"
+>2.4.2. Type declarations</A
+></H2
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class type footnote_printer =
+ object
+ method footnote_to_html : store_type -> out_channel -> unit
+ end
+
+and store_type =
+ object
+ method alloc_footnote : footnote_printer -> int
+ method print_footnotes : out_channel -> unit
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN755"
+>2.4.3. Class <TT
+CLASS="LITERAL"
+>store</TT
+></A
+></H2
+><P
+>The <TT
+CLASS="LITERAL"
+>store</TT
+> is a container for footnotes. You can add a
+footnote by invoking <TT
+CLASS="LITERAL"
+>alloc_footnote</TT
+>; the argument is an
+object of the class <TT
+CLASS="LITERAL"
+>footnote_printer</TT
+>, the method returns the
+number of the footnote. The interesting property of a footnote is that it can
+be converted to HTML, so a <TT
+CLASS="LITERAL"
+>footnote_printer</TT
+> is an object
+with a method <TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+>. The class
+<TT
+CLASS="LITERAL"
+>footnote</TT
+> which is defined below has a compatible method
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+> such that objects created from it can be
+used as <TT
+CLASS="LITERAL"
+>footnote_printer</TT
+>s.</P
+><P
+>The other method, <TT
+CLASS="LITERAL"
+>print_footnotes</TT
+> prints the footnotes as
+definition list, and is typically invoked after the main material of the page
+has already been printed. Every item of the list is printed by
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+>.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class store =
+ object (self)
+
+ val mutable footnotes = ( [] : (int * footnote_printer) list )
+ val mutable next_footnote_number = 1
+
+ method alloc_footnote n =
+ let number = next_footnote_number in
+ next_footnote_number <- number+1;
+ footnotes <- footnotes @ [ number, n ];
+ number
+
+ method print_footnotes ch =
+ if footnotes <> [] then begin
+ output_string ch "<hr align=left noshade=noshade width=\"30%\">\n";
+ output_string ch "<dl>\n";
+ List.iter
+ (fun (_,n) ->
+ n # footnote_to_html (self : #store_type :> store_type) ch)
+ footnotes;
+ output_string ch "</dl>\n";
+ end
+
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN772"
+>2.4.4. Function <TT
+CLASS="LITERAL"
+>escape_html</TT
+></A
+></H2
+><P
+>This function converts the characters <, >, &, and " to their HTML
+representation. For example,
+<TT
+CLASS="LITERAL"
+>escape_html "<>" = "&lt;&gt;"</TT
+>. Other
+characters are left unchanged.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let escape_html s =
+ Str.global_substitute
+ (Str.regexp "<\\|>\\|&\\|\"")
+ (fun s ->
+ match Str.matched_string s with
+ "<" -> "&lt;"
+ | ">" -> "&gt;"
+ | "&" -> "&amp;"
+ | "\"" -> "&quot;"
+ | _ -> assert false)
+ s
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN778"
+>2.4.5. Virtual class <TT
+CLASS="LITERAL"
+>shared</TT
+></A
+></H2
+><P
+>This virtual class is the abstract superclass of the extension classes shown
+below. It defines the standard methods <TT
+CLASS="LITERAL"
+>clone</TT
+>,
+<TT
+CLASS="LITERAL"
+>node</TT
+>, and <TT
+CLASS="LITERAL"
+>set_node</TT
+>, and declares the type
+of the virtual method <TT
+CLASS="LITERAL"
+>to_html</TT
+>. This method recursively
+traverses the whole element tree, and prints the converted HTML code to the
+output channel passed as second argument. The first argument is the reference
+to the global <TT
+CLASS="LITERAL"
+>store</TT
+> object which collects the footnotes.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class virtual shared =
+ object (self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- virtual --- *)
+
+ method virtual to_html : store -> out_channel -> unit
+
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN788"
+>2.4.6. Class <TT
+CLASS="LITERAL"
+>only_data</TT
+></A
+></H2
+><P
+>This class defines <TT
+CLASS="LITERAL"
+>to_html</TT
+> such that the character data of
+the current node is converted to HTML. Note that <TT
+CLASS="LITERAL"
+>self</TT
+> is an
+extension object, <TT
+CLASS="LITERAL"
+>self # node</TT
+> is the node object, and
+<TT
+CLASS="LITERAL"
+>self # node # data</TT
+> returns the character data of the node.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class only_data =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch (escape_html (self # node # data))
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN797"
+>2.4.7. Class <TT
+CLASS="LITERAL"
+>readme</TT
+></A
+></H2
+><P
+>This class converts elements of type <TT
+CLASS="LITERAL"
+>readme</TT
+> to HTML. Such an
+element is (by definition) always the root element of the document. First, the
+HTML header is printed; the <TT
+CLASS="LITERAL"
+>title</TT
+> attribute of the element
+determines the title of the HTML page. Some aspects of the HTML page can be
+configured by setting certain parameter entities, for example the background
+color, the text color, and link colors. After the header, the
+<TT
+CLASS="LITERAL"
+>body</TT
+> tag, and the headline have been printed, the contents
+of the page are converted by invoking <TT
+CLASS="LITERAL"
+>to_html</TT
+> on all
+children of the current node (which is the root node). Then, the footnotes are
+appended to this by telling the global <TT
+CLASS="LITERAL"
+>store</TT
+> object to print
+the footnotes. Finally, the end tags of the HTML pages are printed.</P
+><P
+>This class is an example how to access the value of an attribute: The value is
+determined by invoking <TT
+CLASS="LITERAL"
+>self # node # attribute "title"</TT
+>. As
+this attribute has been declared as CDATA and as being required, the value has
+always the form <TT
+CLASS="LITERAL"
+>Value s</TT
+> where <TT
+CLASS="LITERAL"
+>s</TT
+> is the
+string value of the attribute. </P
+><P
+>You can also see how entity contents can be accessed. A parameter entity object
+can be looked up by <TT
+CLASS="LITERAL"
+>self # node # dtd # par_entity "name"</TT
+>,
+and by invoking <TT
+CLASS="LITERAL"
+>replacement_text</TT
+> the value of the entity
+is returned after inner parameter and character entities have been
+processed. Note that you must use <TT
+CLASS="LITERAL"
+>gen_entity</TT
+> instead of
+<TT
+CLASS="LITERAL"
+>par_entity</TT
+> to access general entities.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class readme =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ (* output header *)
+ output_string
+ ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
+ output_string
+ ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
+ let title =
+ match self # node # attribute "title" with
+ Value s -> s
+ | _ -> assert false
+ in
+ let html_header, _ =
+ try (self # node # dtd # par_entity "readme:html:header")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_trailer, _ =
+ try (self # node # dtd # par_entity "readme:html:trailer")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_bgcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:bgcolor")
+ # replacement_text
+ with WF_error _ -> "white", false in
+ let html_textcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:textcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_alinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:alinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_vlinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_linkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:linkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_background, _ =
+ try (self # node # dtd # par_entity "readme:html:background")
+ # replacement_text
+ with WF_error _ -> "", false in
+
+ output_string ch "<html><header><title>\n";
+ output_string ch (escape_html title);
+ output_string ch "</title></header>\n";
+ output_string ch "<body ";
+ List.iter
+ (fun (name,value) ->
+ if value <> "" then
+ output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+ [ "bgcolor", html_bgcolor;
+ "text", html_textcolor;
+ "link", html_linkcolor;
+ "alink", html_alinkcolor;
+ "vlink", html_vlinkcolor;
+ ];
+ output_string ch ">\n";
+ output_string ch html_header;
+ output_string ch "<h1>";
+ output_string ch (escape_html title);
+ output_string ch "</h1>\n";
+ (* process main content: *)
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ (* now process footnotes *)
+ store # print_footnotes ch;
+ (* trailer *)
+ output_string ch html_trailer;
+ output_string ch "</html>\n";
+
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN817"
+>2.4.8. Classes <TT
+CLASS="LITERAL"
+>section</TT
+>, <TT
+CLASS="LITERAL"
+>sect1</TT
+>,
+<TT
+CLASS="LITERAL"
+>sect2</TT
+>, and <TT
+CLASS="LITERAL"
+>sect3</TT
+></A
+></H2
+><P
+>As the conversion process is very similar, the conversion classes of the three
+section levels are derived from the more general <TT
+CLASS="LITERAL"
+>section</TT
+>
+class. The HTML code of the section levels only differs in the type of the
+headline, and because of this the classes describing the section levels can be
+computed by replacing the class argument <TT
+CLASS="LITERAL"
+>the_tag</TT
+> of
+<TT
+CLASS="LITERAL"
+>section</TT
+> by the HTML name of the headline tag.</P
+><P
+>Section elements are converted to HTML by printing a headline and then
+converting the contents of the element recursively. More precisely, the first
+sub-element is always a <TT
+CLASS="LITERAL"
+>title</TT
+> element, and the other
+elements are the contents of the section. This structure is declared in the
+DTD, and it is guaranteed that the document matches the DTD. Because of this
+the title node can be separated from the rest without any checks.</P
+><P
+>Both the title node, and the body nodes are then converted to HTML by calling
+<TT
+CLASS="LITERAL"
+>to_html</TT
+> on them.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class section the_tag =
+ object (self)
+ inherit shared
+
+ val tag = the_tag
+
+ method to_html store ch =
+ let sub_nodes = self # node # sub_nodes in
+ match sub_nodes with
+ title_node :: rest ->
+ output_string ch ("<" ^ tag ^ ">\n");
+ title_node # extension # to_html store ch;
+ output_string ch ("\n</" ^ tag ^ ">");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ rest
+ | _ ->
+ assert false
+ end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN833"
+>2.4.9. Classes <TT
+CLASS="LITERAL"
+>map_tag</TT
+>, <TT
+CLASS="LITERAL"
+>p</TT
+>,
+<TT
+CLASS="LITERAL"
+>em</TT
+>, <TT
+CLASS="LITERAL"
+>ul</TT
+>, <TT
+CLASS="LITERAL"
+>li</TT
+></A
+></H2
+><P
+>Several element types are converted to HTML by simply mapping them to
+corresponding HTML element types. The class <TT
+CLASS="LITERAL"
+>map_tag</TT
+>
+implements this, and the class argument <TT
+CLASS="LITERAL"
+>the_target_tag</TT
+>
+determines the tag name to map to. The output consists of the start tag, the
+recursively converted inner elements, and the end tag.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class map_tag the_target_tag =
+ object (self)
+ inherit shared
+
+ val target_tag = the_target_tag
+
+ method to_html store ch =
+ output_string ch ("<" ^ target_tag ^ ">\n");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</" ^ target_tag ^ ">");
+ end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN844"
+>2.4.10. Class <TT
+CLASS="LITERAL"
+>br</TT
+></A
+></H2
+><P
+>Element of type <TT
+CLASS="LITERAL"
+>br</TT
+> are mapped to the same HTML type. Note
+that HTML forbids the end tag of <TT
+CLASS="LITERAL"
+>br</TT
+>.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class br =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<br>\n";
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN851"
+>2.4.11. Class <TT
+CLASS="LITERAL"
+>code</TT
+></A
+></H2
+><P
+>The <TT
+CLASS="LITERAL"
+>code</TT
+> type is converted to a <TT
+CLASS="LITERAL"
+>pre</TT
+>
+section (preformatted text). As the meaning of tabs is unspecified in HTML,
+tabs are expanded to spaces.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class code =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ let data = self # node # data in
+ (* convert tabs *)
+ let l = String.length data in
+ let rec preprocess i column =
+ (* this is very ineffective but comprehensive: *)
+ if i < l then
+ match data.[i] with
+ '\t' ->
+ let n = 8 - (column mod 8) in
+ String.make n ' ' ^ preprocess (i+1) (column + n)
+ | '\n' ->
+ "\n" ^ preprocess (i+1) 0
+ | c ->
+ String.make 1 c ^ preprocess (i+1) (column + 1)
+ else
+ ""
+ in
+ output_string ch "<p><pre>";
+ output_string ch (escape_html (preprocess 0 0));
+ output_string ch "</pre></p>";
+
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN858"
+>2.4.12. Class <TT
+CLASS="LITERAL"
+>a</TT
+></A
+></H2
+><P
+>Hyperlinks, expressed by the <TT
+CLASS="LITERAL"
+>a</TT
+> element type, are converted
+to the HTML <TT
+CLASS="LITERAL"
+>a</TT
+> type. If the target of the hyperlink is given
+by <TT
+CLASS="LITERAL"
+>href</TT
+>, the URL of this attribute can be used
+directly. Alternatively, the target can be given by
+<TT
+CLASS="LITERAL"
+>readmeref</TT
+> in which case the ".html" suffix must be added to
+the file name. </P
+><P
+>Note that within <TT
+CLASS="LITERAL"
+>a</TT
+> only #PCDATA is allowed, so the contents
+can be converted directly by applying <TT
+CLASS="LITERAL"
+>escape_html</TT
+> to the
+character data contents.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class a =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<a ";
+ let href =
+ match self # node # attribute "href" with
+ Value v -> escape_html v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ begin match self # node # attribute "readmeref" with
+ Value v -> escape_html v ^ ".html"
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ ""
+ end
+ in
+ if href <> "" then
+ output_string ch ("href=\"" ^ href ^ "\"");
+ output_string ch ">";
+ output_string ch (escape_html (self # node # data));
+ output_string ch "</a>";
+
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN870"
+>2.4.13. Class <TT
+CLASS="LITERAL"
+>footnote</TT
+></A
+></H2
+><P
+>The <TT
+CLASS="LITERAL"
+>footnote</TT
+> class has two methods:
+<TT
+CLASS="LITERAL"
+>to_html</TT
+> to convert the footnote reference to HTML, and
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+> to convert the footnote text itself.</P
+><P
+>The footnote reference is converted to a local hyperlink; more precisely, to
+two anchor tags which are connected with each other. The text anchor points to
+the footnote anchor, and the footnote anchor points to the text anchor.</P
+><P
+>The footnote must be allocated in the <TT
+CLASS="LITERAL"
+>store</TT
+> object. By
+allocating the footnote, you get the number of the footnote, and the text of
+the footnote is stored until the end of the HTML page is reached when the
+footnotes can be printed. The <TT
+CLASS="LITERAL"
+>to_html</TT
+> method stores simply
+the object itself, such that the <TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+> method is
+invoked on the same object that encountered the footnote.</P
+><P
+>The <TT
+CLASS="LITERAL"
+>to_html</TT
+> only allocates the footnote, and prints the
+reference anchor, but it does not print nor convert the contents of the
+note. This is deferred until the footnotes actually get printed, i.e. the
+recursive call of <TT
+CLASS="LITERAL"
+>to_html</TT
+> on the sub nodes is done by
+<TT
+CLASS="LITERAL"
+>footnote_to_html</TT
+>. </P
+><P
+>Note that this technique does not work if you make another footnote within a
+footnote; the second footnote gets allocated but not printed.</P
+><P
+><PRE
+CLASS="PROGRAMLISTING"
+>class footnote =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+
+ method to_html store ch =
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ let foot_anchor =
+ "footnote" ^ string_of_int number in
+ let text_anchor =
+ "textnote" ^ string_of_int number in
+ footnote_number <- number;
+ output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^
+ foot_anchor ^ "\">[" ^ string_of_int number ^
+ "]</a>" )
+
+ method footnote_to_html store ch =
+ (* prerequisite: we are in a definition list <dl>...</dl> *)
+ let foot_anchor =
+ "footnote" ^ string_of_int footnote_number in
+ let text_anchor =
+ "textnote" ^ string_of_int footnote_number in
+ output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^
+ text_anchor ^ "\">[" ^ string_of_int footnote_number ^
+ "]</a></dt>\n<dd>");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</dd>")
+
+ end
+;;</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN889"
+>2.4.14. The specification of the document model</A
+></H2
+><P
+>This code sets up the hash table that connects element types with the exemplars
+of the extension classes that convert the elements to HTML.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>open Pxp_yacc
+
+let tag_map =
+ make_spec_from_alist
+ ~data_exemplar:(new data_impl (new only_data))
+ ~default_element_exemplar:(new element_impl (new no_markup))
+ ~element_alist:
+ [ "readme", (new element_impl (new readme));
+ "sect1", (new element_impl (new sect1));
+ "sect2", (new element_impl (new sect2));
+ "sect3", (new element_impl (new sect3));
+ "title", (new element_impl (new no_markup));
+ "p", (new element_impl (new p));
+ "br", (new element_impl (new br));
+ "code", (new element_impl (new code));
+ "em", (new element_impl (new em));
+ "ul", (new element_impl (new ul));
+ "li", (new element_impl (new li));
+ "footnote", (new element_impl (new footnote : #shared :> shared));
+ "a", (new element_impl (new a));
+ ]
+ ()
+;;</PRE
+></P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="x675.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>Class-based processing of the node tree</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c533.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The objects representing the document</TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+<HTML
+><HEAD
+><TITLE
+>The class type node</TITLE
+><META
+NAME="GENERATOR"
+CONTENT="Modular DocBook HTML Stylesheet Version 1.46"><LINK
+REL="HOME"
+TITLE="The PXP user's guide"
+HREF="index.html"><LINK
+REL="UP"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="PREVIOUS"
+TITLE="The objects representing the document"
+HREF="c893.html"><LINK
+REL="NEXT"
+TITLE="The class type extension"
+HREF="x1439.html"><LINK
+REL="STYLESHEET"
+TYPE="text/css"
+HREF="markup.css"></HEAD
+><BODY
+CLASS="SECT1"
+BGCOLOR="#FFFFFF"
+TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
+><DIV
+CLASS="NAVHEADER"
+><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TH
+COLSPAN="3"
+ALIGN="center"
+>The PXP user's guide</TH
+></TR
+><TR
+><TD
+WIDTH="10%"
+ALIGN="left"
+VALIGN="bottom"
+><A
+HREF="c893.html"
+>Prev</A
+></TD
+><TD
+WIDTH="80%"
+ALIGN="center"
+VALIGN="bottom"
+>Chapter 3. The objects representing the document</TD
+><TD
+WIDTH="10%"
+ALIGN="right"
+VALIGN="bottom"
+><A
+HREF="x1439.html"
+>Next</A
+></TD
+></TR
+></TABLE
+><HR
+ALIGN="LEFT"
+WIDTH="100%"></DIV
+><DIV
+CLASS="SECT1"
+><H1
+CLASS="SECT1"
+><A
+NAME="AEN939"
+>3.2. The class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></H1
+><P
+> From <TT
+CLASS="LITERAL"
+>Pxp_document</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type node_type =
+ T_data
+| T_element of string
+| T_super_root
+| T_pinstr of string
+| T_comment
+<TT
+CLASS="REPLACEABLE"
+><I
+>and some other, reserved types</I
+></TT
+>
+;;
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+
+ <A
+NAME="TYPE-NODE-GENERAL.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-GENERAL"
+><I
+><I
+>General observers</I
+></I
+></A
+> *)
+
+ method extension : 'ext
+ method dtd : dtd
+ method parent : 'ext node
+ method root : 'ext node
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node -> unit) -> unit
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method node_type : node_type
+ method encoding : Pxp_types.rep_encoding
+ method data : string
+ method position : (string * int * int)
+ method comment : string option
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ <A
+NAME="TYPE-NODE-ATTS.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-ATTS"
+><I
+><I
+>Attribute observers</I
+></I
+></A
+> *)
+
+ method attribute : string -> Pxp_types.att_value
+ method required_string_attribute : string -> string
+ method optional_string_attribute : string -> string option
+ method required_list_attribute : string -> string list
+ method optional_list_attribute : string -> string list
+ method attribute_names : string list
+ method attribute_type : string -> Pxp_types.att_type
+ method attributes : (string * Pxp_types.att_value) list
+ method id_attribute_name : string
+ method id_attribute_value : string
+ method idref_attribute_names : string
+
+ <A
+NAME="TYPE-NODE-MODS.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-MODS"
+><I
+><I
+>Modifying methods</I
+></I
+></A
+> *)
+
+ method add_node : ?force:bool -> 'ext node -> unit
+ method add_pinstr : proc_instruction -> unit
+ method delete : unit
+ method set_nodes : 'ext node list -> unit
+ method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ method set_comment : string option -> unit
+
+ <A
+NAME="TYPE-NODE-CLONING.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-CLONING"
+><I
+><I
+>Cloning methods</I
+></I
+></A
+> *)
+
+ method orphaned_clone : 'self
+ method orphaned_flat_clone : 'self
+ method create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list ->
+ 'ext node
+ method create_data : dtd -> string -> 'ext node
+ method keep_always_whitespace_mode : unit
+
+ <A
+NAME="TYPE-NODE-WEIRD.SIG"
+></A
+>(* <A
+HREF="x939.html#TYPE-NODE-WEIRD"
+><I
+><I
+>Validating methods</I
+></I
+></A
+> *)
+
+ method local_validate : ?use_dfa:bool -> unit -> unit
+
+ (* ... Internal methods are undocumented. *)
+
+ end
+;;</PRE
+>
+
+In the module <TT
+CLASS="LITERAL"
+>Pxp_types</TT
+> you can find another type
+definition that is important in this context:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+;;</PRE
+></P
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN958"
+>3.2.1. The structure of document trees</A
+></H2
+><P
+>A node represents either an element or a character data section. There are two
+classes implementing the two aspects of nodes: <TT
+CLASS="LITERAL"
+>element_impl</TT
+>
+and <TT
+CLASS="LITERAL"
+>data_impl</TT
+>. The latter class does not implement all
+methods because some methods do not make sense for data nodes.</P
+><P
+>(Note: PXP also supports a mode which forces that processing instructions and
+comments are represented as nodes of the document tree. However, these nodes
+are instances of <TT
+CLASS="LITERAL"
+>element_impl</TT
+> with node types
+<TT
+CLASS="LITERAL"
+>T_pinstr</TT
+> and <TT
+CLASS="LITERAL"
+>T_comment</TT
+>,
+respectively. This mode must be explicitly configured; the basic representation
+knows only element and data nodes.)</P
+><P
+>The following figure
+(<A
+HREF="x939.html#NODE-TERM"
+><I
+><I
+>A tree with element nodes, data nodes, and attributes</I
+><I
+></I
+></I
+></A
+>) shows an example how
+a tree is constructed from element and data nodes. The circular areas
+represent element nodes whereas the ovals denote data nodes. Only elements
+may have subnodes; data nodes are always leaves of the tree. The subnodes
+of an element can be either element or data nodes; in both cases the O'Caml
+objects storing the nodes have the class type <TT
+CLASS="LITERAL"
+>node</TT
+>.</P
+><P
+>Attributes (the clouds in the picture) are not directly
+integrated into the tree; there is always an extra link to the attribute
+list. This is also true for processing instructions (not shown in the
+picture). This means that there are separated access methods for attributes and
+processing instructions.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-TERM"
+></A
+><P
+><B
+>Figure 3-1. A tree with element nodes, data nodes, and attributes</B
+></P
+><P
+><IMG
+SRC="pic/node_term.gif"></P
+></DIV
+><P
+>Only elements, data sections, attributes and processing
+instructions (and comments, if configured) can, directly or indirectly, occur
+in the document tree. It is impossible to add entity references to the tree; if
+the parser finds such a reference, not the reference as such but the referenced
+text (i.e. the tree representing the structured text) is included in the
+tree.</P
+><P
+>Note that the parser collapses as much data material into one
+data node as possible such that there are normally never two adjacent data
+nodes. This invariant is enforced even if data material is included by entity
+references or CDATA sections, or if a data sequence is interrupted by
+comments. So <TT
+CLASS="LITERAL"
+>a &amp; b <-- comment --> c <![CDATA[
+<> d]]></TT
+> is represented by only one data node, for
+instance. However, you can create document trees manually which break this
+invariant; it is only the way the parser forms the tree.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-GENERAL"
+></A
+><P
+><B
+>Figure 3-2. Nodes are doubly linked trees</B
+></P
+><P
+><IMG
+SRC="pic/node_general.gif"></P
+></DIV
+><P
+>The node tree has links in both directions: Every node has a link to its parent
+(if any), and it has links to the subnodes (see
+figure <A
+HREF="x939.html#NODE-GENERAL"
+><I
+><I
+>Nodes are doubly linked trees</I
+><I
+></I
+></I
+></A
+>). Obviously,
+this doubly-linked structure simplifies the navigation in the tree; but has
+also some consequences for the possible operations on trees.</P
+><P
+>Because every node must have at most <I
+CLASS="EMPHASIS"
+>one</I
+> parent node,
+operations are illegal if they violate this condition. The following figure
+(<A
+HREF="x939.html#NODE-ADD"
+><I
+><I
+>A node can only be added if it is a root</I
+><I
+></I
+></I
+></A
+>) shows on the left side
+that node <TT
+CLASS="LITERAL"
+>y</TT
+> is added to <TT
+CLASS="LITERAL"
+>x</TT
+> as new subnode
+which is allowed because <TT
+CLASS="LITERAL"
+>y</TT
+> does not have a parent yet. The
+right side of the picture illustrates what would happen if <TT
+CLASS="LITERAL"
+>y</TT
+>
+had a parent node; this is illegal because <TT
+CLASS="LITERAL"
+>y</TT
+> would have two
+parents after the operation.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-ADD"
+></A
+><P
+><B
+>Figure 3-3. A node can only be added if it is a root</B
+></P
+><P
+><IMG
+SRC="pic/node_add.gif"></P
+></DIV
+><P
+>The "delete" operation simply removes the links between two nodes. In the
+picture (<A
+HREF="x939.html#NODE-DELETE"
+><I
+><I
+>A deleted node becomes the root of the subtree</I
+><I
+></I
+></I
+></A
+>) the node
+<TT
+CLASS="LITERAL"
+>x</TT
+> is deleted from the list of subnodes of
+<TT
+CLASS="LITERAL"
+>y</TT
+>. After that, <TT
+CLASS="LITERAL"
+>x</TT
+> becomes the root of the
+subtree starting at this node.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-DELETE"
+></A
+><P
+><B
+>Figure 3-4. A deleted node becomes the root of the subtree</B
+></P
+><P
+><IMG
+SRC="pic/node_delete.gif"></P
+></DIV
+><P
+>It is also possible to make a clone of a subtree; illustrated in
+<A
+HREF="x939.html#NODE-CLONE"
+><I
+><I
+>The clone of a subtree</I
+><I
+></I
+></I
+></A
+>. In this case, the
+clone is a copy of the original subtree except that it is no longer a
+subnode. Because cloning never keeps the connection to the parent, the clones
+are called <I
+CLASS="EMPHASIS"
+>orphaned</I
+>.</P
+><DIV
+CLASS="FIGURE"
+><A
+NAME="NODE-CLONE"
+></A
+><P
+><B
+>Figure 3-5. The clone of a subtree</B
+></P
+><P
+><IMG
+SRC="pic/node_clone.gif"></P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1007"
+>3.2.2. The methods of the class type <TT
+CLASS="LITERAL"
+>node</TT
+></A
+></H2
+><A
+NAME="TYPE-NODE-GENERAL"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+> <A
+HREF="x939.html#TYPE-NODE-GENERAL.SIG"
+>General observers</A
+>
+ . </B
+> <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>extension</TT
+>: The reference to the extension object which
+belongs to this node (see ...).</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>dtd</TT
+>: Returns a reference to the global DTD. All nodes
+of a tree must share the same DTD.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>parent</TT
+>: Get the father node. Raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> in the case the node does not have a
+parent, i.e. the node is the root.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>root</TT
+>: Gets the reference to the root node of the tree.
+Every node is contained in a tree with a root, so this method always
+succeeds. Note that this method <I
+CLASS="EMPHASIS"
+>searches</I
+> the root,
+which costs time proportional to the length of the path to the root.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>sub_nodes</TT
+>: Returns references to the children. The returned
+list reflects the order of the children. For data nodes, this method returns
+the empty list.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>iter_nodes f</TT
+>: Iterates over the children, and calls
+<TT
+CLASS="LITERAL"
+>f</TT
+> for every child in turn. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>iter_nodes_sibl f</TT
+>: Iterates over the children, and calls
+<TT
+CLASS="LITERAL"
+>f</TT
+> for every child in turn. <TT
+CLASS="LITERAL"
+>f</TT
+> gets as
+arguments the previous node, the current node, and the next node.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>node_type</TT
+>: Returns either <TT
+CLASS="LITERAL"
+>T_data</TT
+> which
+means that the node is a data node, or <TT
+CLASS="LITERAL"
+>T_element n</TT
+>
+which means that the node is an element of type <TT
+CLASS="LITERAL"
+>n</TT
+>.
+If configured, possible node types are also <TT
+CLASS="LITERAL"
+>T_pinstr t</TT
+>
+indicating that the node represents a processing instruction with target
+<TT
+CLASS="LITERAL"
+>t</TT
+>, and <TT
+CLASS="LITERAL"
+>T_comment</TT
+> in which case the node
+is a comment.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>encoding</TT
+>: Returns the encoding of the strings.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>data</TT
+>: Returns the character data of this node and all
+children, concatenated as one string. The encoding of the string is what
+the method <TT
+CLASS="LITERAL"
+>encoding</TT
+> returns.
+- For data nodes, this method simply returns the represented characters.
+For elements, the meaning of the method has been extended such that it
+returns something useful, i.e. the effectively contained characters, without
+markup. (For <TT
+CLASS="LITERAL"
+>T_pinstr</TT
+> and <TT
+CLASS="LITERAL"
+>T_comment</TT
+>
+nodes, the method returns the empty string.)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>position</TT
+>: If configured, this method returns the position of
+the element as triple (entity, line, byteposition). For data nodes, the
+position is not stored. If the position is not available the triple
+<TT
+CLASS="LITERAL"
+>"?", 0, 0</TT
+> is returned.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>comment</TT
+>: Returns <TT
+CLASS="LITERAL"
+>Some text</TT
+> for comment
+nodes, and <TT
+CLASS="LITERAL"
+>None</TT
+> for other nodes. The <TT
+CLASS="LITERAL"
+>text</TT
+>
+is everything between the comment delimiters <TT
+CLASS="LITERAL"
+><--</TT
+> and
+<TT
+CLASS="LITERAL"
+>--></TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr n</TT
+>: Returns all processing instructions that are
+directly contained in this element and that have a <I
+CLASS="EMPHASIS"
+>target</I
+>
+specification of <TT
+CLASS="LITERAL"
+>n</TT
+>. The target is the first word after
+the <TT
+CLASS="LITERAL"
+><?</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>pinstr_names</TT
+>: Returns the list of all targets of processing
+instructions directly contained in this element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>write s enc</TT
+>: Prints the node and all subnodes to the passed
+output stream as valid XML text, using the passed external encoding.</P
+></LI
+></UL
+>
+ </P
+></DIV
+><A
+NAME="TYPE-NODE-ATTS"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+> <A
+HREF="x939.html#TYPE-NODE-ATTS.SIG"
+>Attribute observers</A
+>
+ . </B
+> <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attribute n</TT
+>: Returns the value of the attribute with name
+<TT
+CLASS="LITERAL"
+>n</TT
+>. This method returns a value for every declared
+attribute, and it raises <TT
+CLASS="LITERAL"
+>Not_found</TT
+> for any undeclared
+attribute. Note that it even returns a value if the attribute is actually
+missing but is declared as <TT
+CLASS="LITERAL"
+>#IMPLIED</TT
+> or has a default
+value. - Possible values are:
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Implied_value</TT
+>: The attribute has been declared with the
+keyword <TT
+CLASS="LITERAL"
+>#IMPLIED</TT
+>, and the attribute is missing in the
+attribute list of this element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Value s</TT
+>: The attribute has been declared as type
+<TT
+CLASS="LITERAL"
+>CDATA</TT
+>, as <TT
+CLASS="LITERAL"
+>ID</TT
+>, as
+<TT
+CLASS="LITERAL"
+>IDREF</TT
+>, as <TT
+CLASS="LITERAL"
+>ENTITY</TT
+>, or as
+<TT
+CLASS="LITERAL"
+>NMTOKEN</TT
+>, or as enumeration or notation, and one of the two
+conditions holds: (1) The attribute value is present in the attribute list in
+which case the value is returned in the string <TT
+CLASS="LITERAL"
+>s</TT
+>. (2) The
+attribute has been omitted, and the DTD declared the attribute with a default
+value. The default value is returned in <TT
+CLASS="LITERAL"
+>s</TT
+>.
+- Summarized, <TT
+CLASS="LITERAL"
+>Value s</TT
+> is returned for non-implied, non-list
+attribute values.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>Valuelist l</TT
+>: The attribute has been declared as type
+<TT
+CLASS="LITERAL"
+>IDREFS</TT
+>, as <TT
+CLASS="LITERAL"
+>ENTITIES</TT
+>, or
+as <TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+>, and one of the two conditions holds: (1) The
+attribute value is present in the attribute list in which case the
+space-separated tokens of the value are returned in the string list
+<TT
+CLASS="LITERAL"
+>l</TT
+>. (2) The attribute has been omitted, and the DTD declared
+the attribute with a default value. The default value is returned in
+<TT
+CLASS="LITERAL"
+>l</TT
+>.
+- Summarized, <TT
+CLASS="LITERAL"
+>Valuelist l</TT
+> is returned for all list-type
+attribute values.</P
+></LI
+></UL
+>
+
+Note that before the attribute value is returned, the value is normalized. This
+means that newlines are converted to spaces, and that references to character
+entities (i.e. <TT
+CLASS="LITERAL"
+>&#<TT
+CLASS="REPLACEABLE"
+><I
+>n</I
+></TT
+>;</TT
+>) and
+general entities
+(i.e. <TT
+CLASS="LITERAL"
+>&<TT
+CLASS="REPLACEABLE"
+><I
+>name</I
+></TT
+>;</TT
+>) are expanded;
+if necessary, expansion is performed recursively.</P
+><P
+>In well-formedness mode, there is no DTD which could declare an
+attribute. Because of this, every occuring attribute is considered as a CDATA
+attribute.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>required_string_attribute n</TT
+>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is convenient
+if you expect a non-implied and non-list attribute value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>optional_string_attribute n</TT
+>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method returns None. - This method is
+convenient if you expect a non-list attribute value including the implied
+value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>required_list_attribute n</TT
+>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is
+convenient if you expect a list attribute value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>optional_list_attribute n</TT
+>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, an empty list will be returned. - This method
+is convenient if you expect a list attribute value or the implied value.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attribute_names</TT
+>: returns the list of all attribute names of
+this element. As this is a validating parser, this list is equal to the
+list of declared attributes.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attribute_type n</TT
+>: returns the type of the attribute called
+<TT
+CLASS="LITERAL"
+>n</TT
+>. See the module <TT
+CLASS="LITERAL"
+>Pxp_types</TT
+> for a
+description of the encoding of the types.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>attributes</TT
+>: returns the list of pairs of names and values
+for all attributes of
+this element.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>id_attribute_name</TT
+>: returns the name of the attribute that is
+declared with type ID. There is at most one such attribute. The method raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> if there is no declared ID attribute for the
+element type.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>id_attribute_value</TT
+>: returns the value of the attribute that
+is declared with type ID. There is at most one such attribute. The method raises
+<TT
+CLASS="LITERAL"
+>Not_found</TT
+> if there is no declared ID attribute for the
+element type.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>idref_attribute_names</TT
+>: returns the list of attribute names
+that are declared as IDREF or IDREFS.</P
+></LI
+></UL
+>
+ </P
+></DIV
+><A
+NAME="TYPE-NODE-MODS"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+> <A
+HREF="x939.html#TYPE-NODE-MODS.SIG"
+>Modifying methods</A
+>
+ . </B
+>The following methods are only defined for element nodes (more exactly:
+the methods are defined for data nodes, too, but fail always).
+
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>add_node sn</TT
+>: Adds sub node <TT
+CLASS="LITERAL"
+>sn</TT
+> to the list
+of children. This operation is illustrated in the picture
+<A
+HREF="x939.html#NODE-ADD"
+><I
+><I
+>A node can only be added if it is a root</I
+><I
+></I
+></I
+></A
+>. This method expects that
+<TT
+CLASS="LITERAL"
+>sn</TT
+> is a root, and it requires that <TT
+CLASS="LITERAL"
+>sn</TT
+> and
+the current object share the same DTD.</P
+><P
+>Because <TT
+CLASS="LITERAL"
+>add_node</TT
+> is the method the parser itself uses
+to add new nodes to the tree, it performs by default some simple validation
+checks: If the content model is a regular expression, it is not allowed to add
+data nodes to this node unless the new nodes consist only of whitespace. In
+this case, the new data nodes are silently dropped (you can change this by
+invoking <TT
+CLASS="LITERAL"
+>keep_always_whitespace_mode</TT
+>).</P
+><P
+>If the document is flagged as stand-alone, these data nodes only
+containing whitespace are even forbidden if the element declaration is
+contained in an external entity. This case is detected and rejected.</P
+><P
+>If the content model is <TT
+CLASS="LITERAL"
+>EMPTY</TT
+>, it is not allowed to
+add any data node unless the data node is empty. In this case, the new data
+node is silently dropped.</P
+><P
+>These checks only apply if there is a DTD. In well-formedness mode, it is
+assumed that every element is declared with content model
+<TT
+CLASS="LITERAL"
+>ANY</TT
+> which prohibits any validation check. Furthermore, you
+turn these checks off by passing <TT
+CLASS="LITERAL"
+>~force:true</TT
+> as first
+argument.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>add_pinstr pi</TT
+>: Adds the processing instruction
+<TT
+CLASS="LITERAL"
+>pi</TT
+> to the list of processing instructions.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>delete</TT
+>: Deletes this node from the tree. After this
+operation, this node is no longer the child of the former father node; and the
+node loses the connection to the father as well. This operation is illustrated
+by the figure <A
+HREF="x939.html#NODE-DELETE"
+><I
+><I
+>A deleted node becomes the root of the subtree</I
+><I
+></I
+></I
+></A
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>set_nodes nl</TT
+>: Sets the list of children to
+<TT
+CLASS="LITERAL"
+>nl</TT
+>. It is required that every member of <TT
+CLASS="LITERAL"
+>nl</TT
+>
+is a root, and that all members and the current object share the same DTD.
+Unlike <TT
+CLASS="LITERAL"
+>add_node</TT
+>, no validation checks are performed.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>quick_set_attributes atts</TT
+>: sets the attributes of this
+element to <TT
+CLASS="LITERAL"
+>atts</TT
+>. It is <I
+CLASS="EMPHASIS"
+>not</I
+> checked
+whether <TT
+CLASS="LITERAL"
+>atts</TT
+> matches the DTD or not; it is up to the
+caller of this method to ensure this. (This method may be useful to transform
+the attribute values, i.e. apply a mapping to every attribute.)</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>set_comment text</TT
+>: This method is only applicable to
+<TT
+CLASS="LITERAL"
+>T_comment</TT
+> nodes; it sets the comment text contained by such
+nodes. </P
+></LI
+></UL
+></P
+></DIV
+><A
+NAME="TYPE-NODE-CLONING"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+> <A
+HREF="x939.html#TYPE-NODE-CLONING.SIG"
+>Cloning methods</A
+>
+ . </B
+> <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>orphaned_clone</TT
+>: Returns a clone of the node and the complete
+tree below this node (deep clone). The clone does not have a parent (i.e. the
+reference to the parent node is <I
+CLASS="EMPHASIS"
+>not</I
+> cloned). While
+copying the subtree, strings are skipped; it is likely that the original tree
+and the copy tree share strings. Extension objects are cloned by invoking
+the <TT
+CLASS="LITERAL"
+>clone</TT
+> method on the original objects; how much of
+the extension objects is cloned depends on the implemention of this method.</P
+><P
+>This operation is illustrated by the figure
+<A
+HREF="x939.html#NODE-CLONE"
+><I
+><I
+>The clone of a subtree</I
+><I
+></I
+></I
+></A
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>orphaned_flat_clone</TT
+>: Returns a clone of the node,
+but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><A
+NAME="TYPE-NODE-METH-CREATE-ELEMENT"
+></A
+>
+<TT
+CLASS="LITERAL"
+>create_element dtd nt al</TT
+>: Returns a flat copy of this node
+(which must be an element) with the following modifications: The DTD is set to
+<TT
+CLASS="LITERAL"
+>dtd</TT
+>; the node type is set to <TT
+CLASS="LITERAL"
+>nt</TT
+>, and the
+new attribute list is set to <TT
+CLASS="LITERAL"
+>al</TT
+> (given as list of
+(name,value) pairs). The copy does not have children nor a parent. It does not
+contain processing instructions. See
+<A
+HREF="x939.html#TYPE-NODE-EX-CREATE-ELEMENT"
+>the example below</A
+>.</P
+><P
+>Note that you can specify the position of the new node
+by the optional argument <TT
+CLASS="LITERAL"
+>~position</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><A
+NAME="TYPE-NODE-METH-CREATE-DATA"
+></A
+>
+<TT
+CLASS="LITERAL"
+>create_data dtd cdata</TT
+>: Returns a flat copy of this node
+(which must be a data node) with the following modifications: The DTD is set to
+<TT
+CLASS="LITERAL"
+>dtd</TT
+>; the node type is set to <TT
+CLASS="LITERAL"
+>T_data</TT
+>; the
+attribute list is empty (data nodes never have attributes); the list of
+children and PIs is empty, too (same reason). The new node does not have a
+parent. The value <TT
+CLASS="LITERAL"
+>cdata</TT
+> is the new character content of the
+node. See
+<A
+HREF="x939.html#TYPE-NODE-EX-CREATE-DATA"
+>the example below</A
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>keep_always_whitespace_mode</TT
+>: Even data nodes which are
+normally dropped because they only contain ignorable whitespace, can added to
+this node once this mode is turned on. (This mode is useful to produce
+canonical XML.)</P
+></LI
+></UL
+></P
+></DIV
+><A
+NAME="TYPE-NODE-WEIRD"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+> <A
+HREF="x939.html#TYPE-NODE-WEIRD.SIG"
+>Validating methods</A
+>
+ . </B
+>There is one method which locally validates the node, i.e. checks whether the
+subnodes match the content model of this node.
+
+ <P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>local_validate</TT
+>: Checks that this node conforms to the
+DTD by comparing the type of the subnodes with the content model for this
+node. (Applications need not call this method unless they add new nodes
+themselves to the tree.)</P
+></LI
+></UL
+></P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1252"
+>3.2.3. The class <TT
+CLASS="LITERAL"
+>element_impl</TT
+></A
+></H2
+><P
+>This class is an implementation of <TT
+CLASS="LITERAL"
+>node</TT
+> which
+realizes element nodes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class [ 'ext ] element_impl : 'ext -> [ 'ext ] node</PRE
+> </P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Constructor. </B
+>You can create a new instance by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>new element_impl <TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+></PRE
+>
+
+which creates a special form of empty element which already contains a
+reference to the <TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+>, but is
+otherwise empty. This special form is called an
+<I
+CLASS="EMPHASIS"
+>exemplar</I
+>. The purpose of exemplars is that they serve as
+patterns that can be duplicated and filled with data. The method
+<A
+HREF="x939.html#TYPE-NODE-METH-CREATE-ELEMENT"
+><TT
+CLASS="LITERAL"
+>create_element</TT
+></A
+> is designed to perform this action.</P
+></DIV
+><A
+NAME="TYPE-NODE-EX-CREATE-ELEMENT"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Example. </B
+>First, create an exemplar by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let exemplar_ext = ... in
+let exemplar = new element_impl exemplar_ext in</PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>exemplar</TT
+> is not used in node trees, but only as
+a pattern when the element nodes are created:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let element = exemplar # <A
+HREF="x939.html#TYPE-NODE-METH-CREATE-ELEMENT"
+>create_element</A
+> dtd (T_element name) attlist </PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>element</TT
+> is a copy of <TT
+CLASS="LITERAL"
+>exemplar</TT
+>
+(even the extension <TT
+CLASS="LITERAL"
+>exemplar_ext</TT
+> has been copied)
+which ensures that <TT
+CLASS="LITERAL"
+>element</TT
+> and its extension are objects
+of the same class as the exemplars; note that you need not to pass a
+class name or other meta information. The copy is initially connected
+with the <TT
+CLASS="LITERAL"
+>dtd</TT
+>, it gets a node type, and the attribute list
+is filled. The <TT
+CLASS="LITERAL"
+>element</TT
+> is now fully functional; it can
+be added to another element as child, and it can contain references to
+subnodes.</P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1281"
+>3.2.4. The class <TT
+CLASS="LITERAL"
+>data_impl</TT
+></A
+></H2
+><P
+>This class is an implementation of <TT
+CLASS="LITERAL"
+>node</TT
+> which
+should be used for all character data nodes:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>class [ 'ext ] data_impl : 'ext -> [ 'ext ] node</PRE
+> </P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Constructor. </B
+>You can create a new instance by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>new data_impl <TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+></PRE
+>
+
+which creates an empty exemplar node which is connected to
+<TT
+CLASS="REPLACEABLE"
+><I
+>extension_object</I
+></TT
+>. The node does not contain a
+reference to any DTD, and because of this it cannot be added to node trees.</P
+></DIV
+><P
+>To get a fully working data node, apply the method
+<A
+HREF="x939.html#TYPE-NODE-METH-CREATE-DATA"
+><TT
+CLASS="LITERAL"
+>create_data</TT
+></A
+> to the exemplar (see example).</P
+><A
+NAME="TYPE-NODE-EX-CREATE-DATA"
+></A
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Example. </B
+>First, create an exemplar by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let exemplar_ext = ... in
+let exemplar = new exemplar_ext data_impl in</PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>exemplar</TT
+> is not used in node trees, but only as
+a pattern when the data nodes are created:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let data_node = exemplar # <A
+HREF="x939.html#TYPE-NODE-METH-CREATE-DATA"
+>create_data</A
+> dtd "The characters contained in the data node" </PRE
+>
+
+The <TT
+CLASS="LITERAL"
+>data_node</TT
+> is a copy of <TT
+CLASS="LITERAL"
+>exemplar</TT
+>.
+The copy is initially connected
+with the <TT
+CLASS="LITERAL"
+>dtd</TT
+>, and it is filled with character material.
+The <TT
+CLASS="LITERAL"
+>data_node</TT
+> is now fully functional; it can
+be added to an element as child.</P
+></DIV
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1308"
+>3.2.5. The type <TT
+CLASS="LITERAL"
+>spec</TT
+></A
+></H2
+><P
+>The type <TT
+CLASS="LITERAL"
+>spec</TT
+> defines a way to handle the details of
+creating nodes from exemplars.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>type 'ext spec
+constraint 'ext = 'ext node #extension
+
+val make_spec_from_mapping :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_mapping: (string, 'ext node) Hashtbl.t ->
+ unit ->
+ 'ext spec
+
+val make_spec_from_alist :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_alist : (string * 'ext node) list ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_alist: (string * 'ext node) list ->
+ unit ->
+ 'ext spec</PRE
+>
+
+The two functions <TT
+CLASS="LITERAL"
+>make_spec_from_mapping</TT
+> and
+<TT
+CLASS="LITERAL"
+>make_spec_from_alist</TT
+> create <TT
+CLASS="LITERAL"
+>spec</TT
+>
+values. Both functions are functionally equivalent and the only difference is
+that the first function prefers hashtables and the latter associative lists to
+describe mappings from names to exemplars.</P
+><P
+>You can specify exemplars for the various kinds of nodes that need to be
+generated when an XML document is parsed:
+
+<P
+></P
+><UL
+COMPACT="COMPACT"
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~super_root_exemplar</TT
+>: This exemplar
+is used to create the super root. This special node is only created if the
+corresponding configuration option has been selected; it is the parent node of
+the root node which may be convenient if every working node must have a parent.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~comment_exemplar</TT
+>: This exemplar is
+used when a comment node must be created. Note that such nodes are only created
+if the corresponding configuration option is "on".</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~default_pinstr_exemplar</TT
+>: If a node
+for a processing instruction must be created, and the instruction is not listed
+in the table passed by <TT
+CLASS="LITERAL"
+>~pinstr_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>~pinstr_alist</TT
+>, this exemplar is used.
+Again the configuration option must be "on" in order to create such nodes at
+all. </P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~pinstr_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>~pinstr_alist</TT
+>: Map the target names of processing
+instructions to exemplars. These mappings are only used when nodes for
+processing instructions are created.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~data_exemplar</TT
+>: The exemplar for
+ordinary data nodes.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~default_element_exemplar</TT
+>: This
+exemplar is used if an element node must be created, but the element type
+cannot be found in the tables <TT
+CLASS="LITERAL"
+>element_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>element_alist</TT
+>.</P
+></LI
+><LI
+STYLE="list-style-type: disc"
+><P
+><TT
+CLASS="LITERAL"
+>~element_mapping</TT
+> or
+<TT
+CLASS="LITERAL"
+>~element_alist</TT
+>: Map the element types to exemplars. These
+mappings are used to create element nodes.</P
+></LI
+></UL
+>
+
+In most cases, you only want to create <TT
+CLASS="LITERAL"
+>spec</TT
+> values to pass
+them to the parser functions found in <TT
+CLASS="LITERAL"
+>Pxp_yacc</TT
+>. However, it
+might be useful to apply <TT
+CLASS="LITERAL"
+>spec</TT
+> values directly.</P
+><P
+>The following functions create various types of nodes by selecting the
+corresponding exemplar from the passed <TT
+CLASS="LITERAL"
+>spec</TT
+> value, and by
+calling <TT
+CLASS="LITERAL"
+>create_element</TT
+> or <TT
+CLASS="LITERAL"
+>create_data</TT
+> on
+the exemplar.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val create_data_node :
+ 'ext spec ->
+ dtd ->
+ (* data material: *) string ->
+ 'ext node
+
+val create_element_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* element type: *) string ->
+ (* attributes: *) (string * string) list ->
+ 'ext node
+
+val create_super_root_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ 'ext node
+
+val create_comment_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* comment text: *) string ->
+ 'ext node
+
+val create_pinstr_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ proc_instruction ->
+ 'ext node</PRE
+></P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1354"
+>3.2.6. Examples</A
+></H2
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Building trees. </B
+>Here is the piece of code that creates the tree of
+the figure <A
+HREF="x939.html#NODE-TERM"
+><I
+><I
+>A tree with element nodes, data nodes, and attributes</I
+><I
+></I
+></I
+></A
+>. The extension
+object and the DTD are beyond the scope of this example.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let exemplar_ext = ... (* some extension *) in
+let dtd = ... (* some DTD *) in
+
+let element_exemplar = new element_impl exemplar_ext in
+let data_exemplar = new data_impl exemplar_ext in
+
+let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
+and b1 = element_exemplar # create_element dtd (T_element "b") []
+and c1 = element_exemplar # create_element dtd (T_element "c") []
+and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
+in
+
+let cherries = data_exemplar # create_data dtd "Cherries" in
+let orange = data_exemplar # create_data dtd "An orange" in
+
+a1 # add_node b1;
+a1 # add_node c1;
+b1 # add_node a2;
+b1 # add_node cherries;
+a2 # add_node orange;</PRE
+>
+
+Alternatively, the last block of statements could also be written as:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>a1 # set_nodes [b1; c1];
+b1 # set_nodes [a2; cherries];
+a2 # set_nodes [orange];</PRE
+>
+
+The root of the tree is <TT
+CLASS="LITERAL"
+>a1</TT
+>, i.e. it is true that
+
+<PRE
+CLASS="PROGRAMLISTING"
+>x # root == a1</PRE
+>
+
+for every x from { <TT
+CLASS="LITERAL"
+>a1</TT
+>, <TT
+CLASS="LITERAL"
+>a2</TT
+>,
+<TT
+CLASS="LITERAL"
+>b1</TT
+>, <TT
+CLASS="LITERAL"
+>c1</TT
+>, <TT
+CLASS="LITERAL"
+>cherries</TT
+>,
+<TT
+CLASS="LITERAL"
+>orange</TT
+> }.</P
+></DIV
+><P
+>Furthermore, the following properties hold:
+
+<PRE
+CLASS="PROGRAMLISTING"
+> a1 # attribute "att" = Value "apple"
+& a2 # attribute "att" = Value "orange"
+
+& cherries # data = "Cherries"
+& orange # data = "An orange"
+& a1 # data = "CherriesAn orange"
+
+& a1 # node_type = T_element "a"
+& a2 # node_type = T_element "a"
+& b1 # node_type = T_element "b"
+& c1 # node_type = T_element "c"
+& cherries # node_type = T_data
+& orange # node_type = T_data
+
+& a1 # sub_nodes = [ b1; c1 ]
+& a2 # sub_nodes = [ orange ]
+& b1 # sub_nodes = [ a2; cherries ]
+& c1 # sub_nodes = []
+& cherries # sub_nodes = []
+& orange # sub_nodes = []
+
+& a2 # parent == a1
+& b1 # parent == b1
+& c1 # parent == a1
+& cherries # parent == b1
+& orange # parent == a2</PRE
+></P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Searching nodes. </B
+>The following function searches all nodes of a tree
+for which a certain condition holds:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let rec search p t =
+ if p t then
+ t :: search_list p (t # sub_nodes)
+ else
+ search_list p (t # sub_nodes)
+
+and search_list p l =
+ match l with
+ [] -> []
+ | t :: l' -> (search p t) @ (search_list p l')
+;;</PRE
+></P
+></DIV
+><P
+>For example, if you want to search all elements of a certain
+type <TT
+CLASS="LITERAL"
+>et</TT
+>, the function <TT
+CLASS="LITERAL"
+>search</TT
+> can be
+applied as follows:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let search_element_type et t =
+ search (fun x -> x # node_type = T_element et) t
+;;</PRE
+></P
+><DIV
+CLASS="FORMALPARA"
+><P
+><B
+>Getting attribute values. </B
+>Suppose we have the declaration:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ATTLIST e a CDATA #REQUIRED
+ b CDATA #IMPLIED
+ c CDATA "12345"></PRE
+>
+
+In this case, every element <TT
+CLASS="LITERAL"
+>e</TT
+> must have an attribute
+<TT
+CLASS="LITERAL"
+>a</TT
+>, otherwise the parser would indicate an error. If
+the O'Caml variable <TT
+CLASS="LITERAL"
+>n</TT
+> holds the node of the tree
+corresponding to the element, you can get the value of the attribute
+<TT
+CLASS="LITERAL"
+>a</TT
+> by
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_a = n # required_string_attribute "a"</PRE
+>
+
+which is more or less an abbreviation for
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_a =
+ match n # attribute "a" with
+ Value s -> s
+ | _ -> assert false</PRE
+>
+
+- as the attribute is required, the <TT
+CLASS="LITERAL"
+>attribute</TT
+> method always
+returns a <TT
+CLASS="LITERAL"
+>Value</TT
+>.</P
+></DIV
+><P
+>In contrast to this, the attribute <TT
+CLASS="LITERAL"
+>b</TT
+> can be
+omitted. In this case, the method <TT
+CLASS="LITERAL"
+>required_string_attribute</TT
+>
+works only if the attribute is there, and the method will fail if the attribute
+is missing. To get the value, you can apply the method
+<TT
+CLASS="LITERAL"
+>optional_string_attribute</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_b = n # optional_string_attribute "b"</PRE
+>
+
+Now, <TT
+CLASS="LITERAL"
+>value_of_b</TT
+> is of type <TT
+CLASS="LITERAL"
+>string option</TT
+>,
+and <TT
+CLASS="LITERAL"
+>None</TT
+> represents the omitted attribute. Alternatively,
+you could also use <TT
+CLASS="LITERAL"
+>attribute</TT
+>:
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_b =
+ match n # attribute "b" with
+ Value s -> Some s
+ | Implied_value -> None
+ | _ -> assert false</PRE
+></P
+><P
+>The attribute <TT
+CLASS="LITERAL"
+>c</TT
+> behaves much like
+<TT
+CLASS="LITERAL"
+>a</TT
+>, because it has always a value. If the attribute is
+omitted, the default, here "12345", will be returned instead. Because of this,
+you can again use <TT
+CLASS="LITERAL"
+>required_string_attribute</TT
+> to get the
+value.</P
+><P
+>The type <TT
+CLASS="LITERAL"
+>CDATA</TT
+> is the most general string
+type. The types <TT
+CLASS="LITERAL"
+>NMTOKEN</TT
+>, <TT
+CLASS="LITERAL"
+>ID</TT
+>,
+<TT
+CLASS="LITERAL"
+>IDREF</TT
+>, <TT
+CLASS="LITERAL"
+>ENTITY</TT
+>, and all enumerators and
+notations are special forms of string types that restrict the possible
+values. From O'Caml, they behave like <TT
+CLASS="LITERAL"
+>CDATA</TT
+>, i.e. you can
+use the methods <TT
+CLASS="LITERAL"
+>required_string_attribute</TT
+> and
+<TT
+CLASS="LITERAL"
+>optional_string_attribute</TT
+>, too.</P
+><P
+>In contrast to this, the types <TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+>,
+<TT
+CLASS="LITERAL"
+>IDREFS</TT
+>, and <TT
+CLASS="LITERAL"
+>ENTITIES</TT
+> mean lists of
+strings. Suppose we have the declaration:
+
+<PRE
+CLASS="PROGRAMLISTING"
+><!ATTLIST f d NMTOKENS #REQUIRED
+ e NMTOKENS #IMPLIED></PRE
+>
+
+The type <TT
+CLASS="LITERAL"
+>NMTOKENS</TT
+> stands for lists of space-separated
+tokens; for example the value <TT
+CLASS="LITERAL"
+>"1 abc 23ef"</TT
+> means the list
+<TT
+CLASS="LITERAL"
+>["1"; "abc"; "23ef"]</TT
+>. (Again, <TT
+CLASS="LITERAL"
+>IDREFS</TT
+>
+and <TT
+CLASS="LITERAL"
+>ENTITIES</TT
+> have more restricted values.) To get the
+value of attribute <TT
+CLASS="LITERAL"
+>d</TT
+>, one can use
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_d = n # required_list_attribute "d"</PRE
+>
+
+or
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_d =
+ match n # attribute "d" with
+ Valuelist l -> l
+ | _ -> assert false</PRE
+>
+
+As <TT
+CLASS="LITERAL"
+>d</TT
+> is required, the attribute cannot be omitted, and
+the <TT
+CLASS="LITERAL"
+>attribute</TT
+> method returns always a
+<TT
+CLASS="LITERAL"
+>Valuelist</TT
+>. </P
+><P
+>For optional attributes like <TT
+CLASS="LITERAL"
+>e</TT
+>, apply
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_e = n # optional_list_attribute "e"</PRE
+>
+
+or
+
+<PRE
+CLASS="PROGRAMLISTING"
+>let value_of_e =
+ match n # attribute "e" with
+ Valuelist l -> l
+ | Implied_value -> []
+ | _ -> assert false</PRE
+>
+
+Here, the case that the attribute is missing counts like the empty list.</P
+></DIV
+><DIV
+CLASS="SECT2"
+><H2
+CLASS="SECT2"
+><A
+NAME="AEN1435"
+>3.2.7. Iterators</A
+></H2
+><P
+>There are also several iterators in Pxp_document; please see
+the mli file for details. You can find examples for them in the
+"simple_transformation" directory.
+
+<PRE
+CLASS="PROGRAMLISTING"
+>val find : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node
+
+val find_all : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node list
+
+val find_element : ?deeply:bool ->
+ string -> 'ext node -> 'ext node
+
+val find_all_elements : ?deeply:bool ->
+ string -> 'ext node -> 'ext node list
+
+exception Skip
+val map_tree : pre:('exta node -> 'extb node) ->
+ ?post:('extb node -> 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+
+val map_tree_sibl :
+ pre: ('exta node option -> 'exta node -> 'exta node option ->
+ 'extb node) ->
+ ?post:('extb node option -> 'extb node -> 'extb node option ->
+ 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+val iter_tree : ?pre:('ext node -> unit) ->
+ ?post:('ext node -> unit) ->
+ 'ext node ->
+ unit
+
+val iter_tree_sibl :
+ ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ 'ext node ->
+ unit</PRE
+></P
+></DIV
+></DIV
+><DIV
+CLASS="NAVFOOTER"
+><HR
+ALIGN="LEFT"
+WIDTH="100%"><TABLE
+WIDTH="100%"
+BORDER="0"
+CELLPADDING="0"
+CELLSPACING="0"
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Prev</A
+></TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="index.html"
+>Home</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+><A
+HREF="x1439.html"
+>Next</A
+></TD
+></TR
+><TR
+><TD
+WIDTH="33%"
+ALIGN="left"
+VALIGN="top"
+>The objects representing the document</TD
+><TD
+WIDTH="34%"
+ALIGN="center"
+VALIGN="top"
+><A
+HREF="c893.html"
+>Up</A
+></TD
+><TD
+WIDTH="33%"
+ALIGN="right"
+VALIGN="top"
+>The class type <TT
+CLASS="LITERAL"
+>extension</TT
+></TD
+></TR
+></TABLE
+></DIV
+></BODY
+></HTML
+>
\ No newline at end of file
--- /dev/null
+%!PS-Adobe-2.0
+%%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software
+%%Pages: 96
+%%PageOrder: Ascend
+%%BoundingBox: 0 0 596 842
+%%DocumentFonts: Helvetica-Bold Times-Roman Times-Bold Times-Italic
+%%+ Courier Courier-Oblique Helvetica-BoldOblique Courier-Bold
+%%DocumentPaperSizes: a4
+%%EndComments
+%DVIPSWebPage: (www.radicaleye.com)
+%DVIPSCommandLine: dvips -f
+%DVIPSParameters: dpi=600, compressed
+%DVIPSSource: TeX output 2000.08.30:1757
+%%BeginProcSet: texc.pro
+%!
+/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S
+N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72
+mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0
+0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{
+landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize
+mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[
+matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round
+exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{
+statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0]
+N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin
+/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array
+/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2
+array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N
+df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A
+definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get
+}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub}
+B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr
+1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3
+1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx
+0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx
+sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{
+rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp
+gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B
+/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{
+/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{
+A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy
+get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse}
+ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp
+fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17
+{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add
+chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{
+1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop}
+forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn
+/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put
+}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{
+bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A
+mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{
+SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{
+userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X
+1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4
+index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N
+/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{
+/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT)
+(LaserWriter 16/600)]{A length product length le{A length product exch 0
+exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse
+end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask
+grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot}
+imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round
+exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto
+fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p
+delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M}
+B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{
+p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S
+rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end
+
+%%EndProcSet
+%%BeginProcSet: 8r.enc
+% @@psencodingfile@{
+% author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry",
+% version = "0.6",
+% date = "1 July 1998",
+% filename = "8r.enc",
+% email = "tex-fonts@@tug.org",
+% docstring = "Encoding for TrueType or Type 1 fonts
+% to be used with TeX."
+% @}
+%
+% Idea is to have all the characters normally included in Type 1 fonts
+% available for typesetting. This is effectively the characters in Adobe
+% Standard Encoding + ISO Latin 1 + extra characters from Lucida.
+%
+% Character code assignments were made as follows:
+%
+% (1) the Windows ANSI characters are almost all in their Windows ANSI
+% positions, because some Windows users cannot easily reencode the
+% fonts, and it makes no difference on other systems. The only Windows
+% ANSI characters not available are those that make no sense for
+% typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen
+% (173). quotesingle and grave are moved just because it's such an
+% irritation not having them in TeX positions.
+%
+% (2) Remaining characters are assigned arbitrarily to the lower part
+% of the range, avoiding 0, 10 and 13 in case we meet dumb software.
+%
+% (3) Y&Y Lucida Bright includes some extra text characters; in the
+% hopes that other PostScript fonts, perhaps created for public
+% consumption, will include them, they are included starting at 0x12.
+%
+% (4) Remaining positions left undefined are for use in (hopefully)
+% upward-compatible revisions, if someday more characters are generally
+% available.
+%
+% (5) hyphen appears twice for compatibility with both
+% ASCII and Windows.
+%
+/TeXBase1Encoding [
+% 0x00 (encoded characters from Adobe Standard not in Windows 3.1)
+ /.notdef /dotaccent /fi /fl
+ /fraction /hungarumlaut /Lslash /lslash
+ /ogonek /ring /.notdef
+ /breve /minus /.notdef
+% These are the only two remaining unencoded characters, so may as
+% well include them.
+ /Zcaron /zcaron
+% 0x10
+ /caron /dotlessi
+% (unusual TeX characters available in, e.g., Lucida Bright)
+ /dotlessj /ff /ffi /ffl
+ /.notdef /.notdef /.notdef /.notdef
+ /.notdef /.notdef /.notdef /.notdef
+ % very contentious; it's so painful not having quoteleft and quoteright
+ % at 96 and 145 that we move the things normally found there to here.
+ /grave /quotesingle
+% 0x20 (ASCII begins)
+ /space /exclam /quotedbl /numbersign
+ /dollar /percent /ampersand /quoteright
+ /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash
+% 0x30
+ /zero /one /two /three /four /five /six /seven
+ /eight /nine /colon /semicolon /less /equal /greater /question
+% 0x40
+ /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O
+% 0x50
+ /P /Q /R /S /T /U /V /W
+ /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore
+% 0x60
+ /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o
+% 0x70
+ /p /q /r /s /t /u /v /w
+ /x /y /z /braceleft /bar /braceright /asciitilde
+ /.notdef % rubout; ASCII ends
+% 0x80
+ /.notdef /.notdef /quotesinglbase /florin
+ /quotedblbase /ellipsis /dagger /daggerdbl
+ /circumflex /perthousand /Scaron /guilsinglleft
+ /OE /.notdef /.notdef /.notdef
+% 0x90
+ /.notdef /.notdef /.notdef /quotedblleft
+ /quotedblright /bullet /endash /emdash
+ /tilde /trademark /scaron /guilsinglright
+ /oe /.notdef /.notdef /Ydieresis
+% 0xA0
+ /.notdef % nobreakspace
+ /exclamdown /cent /sterling
+ /currency /yen /brokenbar /section
+ /dieresis /copyright /ordfeminine /guillemotleft
+ /logicalnot
+ /hyphen % Y&Y (also at 45); Windows' softhyphen
+ /registered
+ /macron
+% 0xD0
+ /degree /plusminus /twosuperior /threesuperior
+ /acute /mu /paragraph /periodcentered
+ /cedilla /onesuperior /ordmasculine /guillemotright
+ /onequarter /onehalf /threequarters /questiondown
+% 0xC0
+ /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla
+ /Egrave /Eacute /Ecircumflex /Edieresis
+ /Igrave /Iacute /Icircumflex /Idieresis
+% 0xD0
+ /Eth /Ntilde /Ograve /Oacute
+ /Ocircumflex /Otilde /Odieresis /multiply
+ /Oslash /Ugrave /Uacute /Ucircumflex
+ /Udieresis /Yacute /Thorn /germandbls
+% 0xE0
+ /agrave /aacute /acircumflex /atilde
+ /adieresis /aring /ae /ccedilla
+ /egrave /eacute /ecircumflex /edieresis
+ /igrave /iacute /icircumflex /idieresis
+% 0xF0
+ /eth /ntilde /ograve /oacute
+ /ocircumflex /otilde /odieresis /divide
+ /oslash /ugrave /uacute /ucircumflex
+ /udieresis /yacute /thorn /ydieresis
+] def
+
+%%EndProcSet
+%%BeginProcSet: texps.pro
+%!
+TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2
+index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll
+exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics
+exch def dict begin Encoding{exch dup type/integertype ne{pop pop 1 sub
+dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def}
+ifelse}forall Metrics/Metrics currentdict end def[2 index currentdict
+end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{
+dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1
+roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def
+dup[exch{dup CharStrings exch known not{pop/.notdef/Encoding true def}
+if}forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def}
+def end
+
+%%EndProcSet
+%%BeginProcSet: special.pro
+%!
+TeXDict begin/SDict 200 dict N SDict begin/@SpecialDefaults{/hs 612 N
+/vs 792 N/ho 0 N/vo 0 N/hsc 1 N/vsc 1 N/ang 0 N/CLIP 0 N/rwiSeen false N
+/rhiSeen false N/letter{}N/note{}N/a4{}N/legal{}N}B/@scaleunit 100 N
+/@hscale{@scaleunit div/hsc X}B/@vscale{@scaleunit div/vsc X}B/@hsize{
+/hs X/CLIP 1 N}B/@vsize{/vs X/CLIP 1 N}B/@clip{/CLIP 2 N}B/@hoffset{/ho
+X}B/@voffset{/vo X}B/@angle{/ang X}B/@rwi{10 div/rwi X/rwiSeen true N}B
+/@rhi{10 div/rhi X/rhiSeen true N}B/@llx{/llx X}B/@lly{/lly X}B/@urx{
+/urx X}B/@ury{/ury X}B/magscale true def end/@MacSetUp{userdict/md known
+{userdict/md get type/dicttype eq{userdict begin md length 10 add md
+maxlength ge{/md md dup length 20 add dict copy def}if end md begin
+/letter{}N/note{}N/legal{}N/od{txpose 1 0 mtx defaultmatrix dtransform S
+atan/pa X newpath clippath mark{transform{itransform moveto}}{transform{
+itransform lineto}}{6 -2 roll transform 6 -2 roll transform 6 -2 roll
+transform{itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll
+curveto}}{{closepath}}pathforall newpath counttomark array astore/gc xdf
+pop ct 39 0 put 10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack}
+if}N/txpose{pxs pys scale ppr aload pop por{noflips{pop S neg S TR pop 1
+-1 scale}if xflip yflip and{pop S neg S TR 180 rotate 1 -1 scale ppr 3
+get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip
+yflip not and{pop S neg S TR pop 180 rotate ppr 3 get ppr 1 get neg sub
+neg 0 TR}if yflip xflip not and{ppr 1 get neg ppr 0 get neg TR}if}{
+noflips{TR pop pop 270 rotate 1 -1 scale}if xflip yflip and{TR pop pop
+90 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get
+neg sub neg TR}if xflip yflip not and{TR pop pop 90 rotate ppr 3 get ppr
+1 get neg sub neg 0 TR}if yflip xflip not and{TR pop pop 270 rotate ppr
+2 get ppr 0 get neg sub neg 0 S TR}if}ifelse scaleby96{ppr aload pop 4
+-1 roll add 2 div 3 1 roll add 2 div 2 copy TR .96 dup scale neg S neg S
+TR}if}N/cp{pop pop showpage pm restore}N end}if}if}N/normalscale{
+Resolution 72 div VResolution 72 div neg scale magscale{DVImag dup scale
+}if 0 setgray}N/psfts{S 65781.76 div N}N/startTexFig{/psf$SavedState
+save N userdict maxlength dict begin/magscale true def normalscale
+currentpoint TR/psf$ury psfts/psf$urx psfts/psf$lly psfts/psf$llx psfts
+/psf$y psfts/psf$x psfts currentpoint/psf$cy X/psf$cx X/psf$sx psf$x
+psf$urx psf$llx sub div N/psf$sy psf$y psf$ury psf$lly sub div N psf$sx
+psf$sy scale psf$cx psf$sx div psf$llx sub psf$cy psf$sy div psf$ury sub
+TR/showpage{}N/erasepage{}N/copypage{}N/p 3 def @MacSetUp}N/doclip{
+psf$llx psf$lly psf$urx psf$ury currentpoint 6 2 roll newpath 4 copy 4 2
+roll moveto 6 -1 roll S lineto S lineto S lineto closepath clip newpath
+moveto}N/endTexFig{end psf$SavedState restore}N/@beginspecial{SDict
+begin/SpecialSave save N gsave normalscale currentpoint TR
+@SpecialDefaults count/ocount X/dcount countdictstack N}N/@setspecial{
+CLIP 1 eq{newpath 0 0 moveto hs 0 rlineto 0 vs rlineto hs neg 0 rlineto
+closepath clip}if ho vo TR hsc vsc scale ang rotate rwiSeen{rwi urx llx
+sub div rhiSeen{rhi ury lly sub div}{dup}ifelse scale llx neg lly neg TR
+}{rhiSeen{rhi ury lly sub div dup scale llx neg lly neg TR}if}ifelse
+CLIP 2 eq{newpath llx lly moveto urx lly lineto urx ury lineto llx ury
+lineto closepath clip}if/showpage{}N/erasepage{}N/copypage{}N newpath}N
+/@endspecial{count ocount sub{pop}repeat countdictstack dcount sub{end}
+repeat grestore SpecialSave restore end}N/@defspecial{SDict begin}N
+/@fedspecial{end}B/li{lineto}B/rl{rlineto}B/rc{rcurveto}B/np{/SaveX
+currentpoint/SaveY X N 1 setlinecap newpath}N/st{stroke SaveX SaveY
+moveto}N/fil{fill SaveX SaveY moveto}N/ellipse{/endangle X/startangle X
+/yrad X/xrad X/savematrix matrix currentmatrix N TR xrad yrad scale 0 0
+1 startangle endangle arc savematrix setmatrix}N end
+
+%%EndProcSet
+%%BeginProcSet: color.pro
+%!
+TeXDict begin/setcmykcolor where{pop}{/setcmykcolor{dup 10 eq{pop
+setrgbcolor}{1 sub 4 1 roll 3{3 index add neg dup 0 lt{pop 0}if 3 1 roll
+}repeat setrgbcolor pop}ifelse}B}ifelse/TeXcolorcmyk{setcmykcolor}def
+/TeXcolorrgb{setrgbcolor}def/TeXcolorgrey{setgray}def/TeXcolorgray{
+setgray}def/TeXcolorhsb{sethsbcolor}def/currentcmykcolor where{pop}{
+/currentcmykcolor{currentrgbcolor 10}B}ifelse/DC{exch dup userdict exch
+known{pop pop}{X}ifelse}B/GreenYellow{0.15 0 0.69 0 setcmykcolor}DC
+/Yellow{0 0 1 0 setcmykcolor}DC/Goldenrod{0 0.10 0.84 0 setcmykcolor}DC
+/Dandelion{0 0.29 0.84 0 setcmykcolor}DC/Apricot{0 0.32 0.52 0
+setcmykcolor}DC/Peach{0 0.50 0.70 0 setcmykcolor}DC/Melon{0 0.46 0.50 0
+setcmykcolor}DC/YellowOrange{0 0.42 1 0 setcmykcolor}DC/Orange{0 0.61
+0.87 0 setcmykcolor}DC/BurntOrange{0 0.51 1 0 setcmykcolor}DC
+/Bittersweet{0 0.75 1 0.24 setcmykcolor}DC/RedOrange{0 0.77 0.87 0
+setcmykcolor}DC/Mahogany{0 0.85 0.87 0.35 setcmykcolor}DC/Maroon{0 0.87
+0.68 0.32 setcmykcolor}DC/BrickRed{0 0.89 0.94 0.28 setcmykcolor}DC/Red{
+0 1 1 0 setcmykcolor}DC/OrangeRed{0 1 0.50 0 setcmykcolor}DC/RubineRed{
+0 1 0.13 0 setcmykcolor}DC/WildStrawberry{0 0.96 0.39 0 setcmykcolor}DC
+/Salmon{0 0.53 0.38 0 setcmykcolor}DC/CarnationPink{0 0.63 0 0
+setcmykcolor}DC/Magenta{0 1 0 0 setcmykcolor}DC/VioletRed{0 0.81 0 0
+setcmykcolor}DC/Rhodamine{0 0.82 0 0 setcmykcolor}DC/Mulberry{0.34 0.90
+0 0.02 setcmykcolor}DC/RedViolet{0.07 0.90 0 0.34 setcmykcolor}DC
+/Fuchsia{0.47 0.91 0 0.08 setcmykcolor}DC/Lavender{0 0.48 0 0
+setcmykcolor}DC/Thistle{0.12 0.59 0 0 setcmykcolor}DC/Orchid{0.32 0.64 0
+0 setcmykcolor}DC/DarkOrchid{0.40 0.80 0.20 0 setcmykcolor}DC/Purple{
+0.45 0.86 0 0 setcmykcolor}DC/Plum{0.50 1 0 0 setcmykcolor}DC/Violet{
+0.79 0.88 0 0 setcmykcolor}DC/RoyalPurple{0.75 0.90 0 0 setcmykcolor}DC
+/BlueViolet{0.86 0.91 0 0.04 setcmykcolor}DC/Periwinkle{0.57 0.55 0 0
+setcmykcolor}DC/CadetBlue{0.62 0.57 0.23 0 setcmykcolor}DC
+/CornflowerBlue{0.65 0.13 0 0 setcmykcolor}DC/MidnightBlue{0.98 0.13 0
+0.43 setcmykcolor}DC/NavyBlue{0.94 0.54 0 0 setcmykcolor}DC/RoyalBlue{1
+0.50 0 0 setcmykcolor}DC/Blue{1 1 0 0 setcmykcolor}DC/Cerulean{0.94 0.11
+0 0 setcmykcolor}DC/Cyan{1 0 0 0 setcmykcolor}DC/ProcessBlue{0.96 0 0 0
+setcmykcolor}DC/SkyBlue{0.62 0 0.12 0 setcmykcolor}DC/Turquoise{0.85 0
+0.20 0 setcmykcolor}DC/TealBlue{0.86 0 0.34 0.02 setcmykcolor}DC
+/Aquamarine{0.82 0 0.30 0 setcmykcolor}DC/BlueGreen{0.85 0 0.33 0
+setcmykcolor}DC/Emerald{1 0 0.50 0 setcmykcolor}DC/JungleGreen{0.99 0
+0.52 0 setcmykcolor}DC/SeaGreen{0.69 0 0.50 0 setcmykcolor}DC/Green{1 0
+1 0 setcmykcolor}DC/ForestGreen{0.91 0 0.88 0.12 setcmykcolor}DC
+/PineGreen{0.92 0 0.59 0.25 setcmykcolor}DC/LimeGreen{0.50 0 1 0
+setcmykcolor}DC/YellowGreen{0.44 0 0.74 0 setcmykcolor}DC/SpringGreen{
+0.26 0 0.76 0 setcmykcolor}DC/OliveGreen{0.64 0 0.95 0.40 setcmykcolor}
+DC/RawSienna{0 0.72 1 0.45 setcmykcolor}DC/Sepia{0 0.83 1 0.70
+setcmykcolor}DC/Brown{0 0.81 1 0.60 setcmykcolor}DC/Tan{0.14 0.42 0.56 0
+setcmykcolor}DC/Gray{0 0 0 0.50 setcmykcolor}DC/Black{0 0 0 1
+setcmykcolor}DC/White{0 0 0 0 setcmykcolor}DC end
+
+%%EndProcSet
+TeXDict begin 39158280 55380996 1000 600 600 () @start
+/Fa 106[21 149[{TeXBase1Encoding ReEncodeFont}1 59.7758
+/Times-Roman rf /Fb 135[77 2[77 77 77 3[77 77 77 3[77
+3[77 77 77 99[{TeXBase1Encoding ReEncodeFont}11 129.116
+/Courier-Bold rf /Fc 134[65 65 2[65 65 65 65 1[65 65
+65 65 65 2[65 65 65 65 65 65 65 65 65 1[65 36[65 6[65
+65 65 49[{TeXBase1Encoding ReEncodeFont}25 107.597 /Courier-Bold
+rf /Fd 141[56 4[128 7[80 88 2[80 97[{TeXBase1Encoding ReEncodeFont}5
+143.462 /Helvetica-BoldOblique rf /Fe 147[21 4[37 1[33
+3[37 23[25 14[25 58[{TeXBase1Encoding ReEncodeFont}6
+74.7198 /Times-Italic rf /Ff 204[25 25 25 49[{
+TeXBase1Encoding ReEncodeFont}3 49.8132 /Times-Roman
+rf
+%DVIPSBitmapFont: Fg cmmi8 8 2
+/Fg 2 63 df<EE01C01607161FEE7F00ED01FCED07F0ED1FC0037FC7FCEC01FCEC07F0EC
+0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FCEA03F8EA0FE0EA3F8000FECAFC12F8
+12FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0EC07F0EC01
+FCEC007FED1FC0ED07F0ED01FCED007FEE1FC0160716012A2B7AA537>60
+D<12E012F812FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0
+EC07F0EC01FCEC007FED1FC0ED07F0ED01FCED007FEE1FC01607161FEE7F00ED01FCED07
+F0ED1FC0037FC7FCEC01FCEC07F0EC0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FC
+EA03F8EA0FE0EA3F8000FECAFC12F812E02A2B7AA537>62 D E
+%EndDVIPSBitmapFont
+/Fh 131[40 1[40 40 40 40 40 40 40 40 40 40 40 40 40 40
+40 40 1[40 40 40 1[40 40 40 40 40 1[40 5[40 3[40 40 40
+40 40 40 40 40 40 40 40 1[40 40 40 1[40 40 40 40 40 1[40
+40 40 40 40 40 1[40 4[40 1[40 1[40 40 40 40 40 40 40
+40 40 40 40 1[40 40 40 33[{TeXBase1Encoding ReEncodeFont}69
+67.2479 /Courier rf /Fi 105[37 28[37 37 54 37 37 21 29
+25 37 37 37 37 58 21 37 1[21 37 37 25 33 37 33 37 33
+7[54 54 3[46 5[54 66 46 2[25 2[42 2[50 50 54 5[21 21
+11[19 1[19 2[25 25 25 4[30 31[42 2[{TeXBase1Encoding ReEncodeFont}45
+74.7198 /Times-Roman rf /Fj 135[55 7[61 2[89 28 6[55
+3[55 27[66 69[{TeXBase1Encoding ReEncodeFont}7 99.6264
+/Helvetica-Bold rf /Fk 145[27 2[27 57[27 49[{
+TeXBase1Encoding ReEncodeFont}3 44.8318 /Courier-Oblique
+rf /Fl 135[50 3[50 50 3[50 50 3[50 50 3[50 1[50 50 2[50
+95[{TeXBase1Encoding ReEncodeFont}11 83.022 /Courier-Oblique
+rf
+%DVIPSBitmapFont: Fm cmmi10 10 2
+/Fm 2 63 df<EF0380EF0FC0173FEFFF80933803FE00EE0FF8EE3FE0EEFF80DB03FEC7FC
+ED0FF8ED3FE0EDFF80DA03FEC8FCEC0FF8EC3FE0ECFF80D903FEC9FCEB0FF8EB3FE0EBFF
+80D803FECAFCEA0FF8EA3FE0EA7F8000FECBFCA2EA7F80EA3FE0EA0FF8EA03FEC66C7EEB
+3FE0EB0FF8EB03FE903800FF80EC3FE0EC0FF8EC03FE913800FF80ED3FE0ED0FF8ED03FE
+923800FF80EE3FE0EE0FF8EE03FE933800FF80EF3FC0170FEF0380323279AD41>60
+D<126012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
+C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
+01FF9338007F80EF1FC0A2EF7F80933801FF00EE07FCEE1FF0EE7FC04B48C7FCED07FCED
+1FF0ED7FC04A48C8FCEC07FCEC1FF0EC7FC04948C9FCEB07FCEB1FF0EB7FC04848CAFCEA
+07FCEA3FF0EA7FC048CBFC12FC1270323279AD41>62 D E
+%EndDVIPSBitmapFont
+/Fn 134[45 45 1[45 45 45 45 45 1[45 45 45 45 45 1[45
+45 45 45 45 45 45 45 45 45 1[45 5[45 2[45 8[45 5[45 2[45
+45 1[45 19[45 45 44[{TeXBase1Encoding ReEncodeFont}32
+74.7198 /Courier-Oblique rf
+%DVIPSBitmapFont: Fo cmmi9 9 2
+/Fo 2 63 df<171C177EEE01FEEE07FCEE1FF0EE7FC0923801FF00ED07FCED1FF0ED7FC0
+4A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07FCEA1FF0
+EA7FC048CAFCA2EA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F
+C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE
+01FEEE007E171C2F2E7AA93C>60 D<127012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007F
+C0EB1FF0EB07FCEB01FF9038007FC0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED
+01FF9238007FC0EE1FF0EE07FCEE01FEA2EE07FCEE1FF0EE7FC0923801FF00ED07FCED1F
+F0ED7FC04A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07
+FCEA1FF0EA7FC048CAFC12FC12702F2E7AA93C>62 D E
+%EndDVIPSBitmapFont
+/Fp 134[66 66 93 66 73 40 66 47 1[73 73 73 106 33 2[33
+73 73 40 66 73 66 73 66 8[80 113 80 86 73 80 86 1[80
+1[86 100 73 2[33 86 1[73 80 86 86 1[86 1[73 5[66 66 66
+66 66 66 66 66 66 66 1[33 40 33 2[40 40 5[57 31[73 2[{
+TeXBase1Encoding ReEncodeFont}58 119.552 /Helvetica-Bold
+rf /Fq 129[45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45
+45 45 45 45 1[45 45 45 33[{TeXBase1Encoding ReEncodeFont}90
+74.7198 /Courier rf /Fr 134[37 37 55 37 42 23 32 32 1[42
+42 42 60 23 37 23 23 42 42 23 37 42 37 42 42 1[42 6[51
+69 1[60 46 42 2[51 1[55 69 46 2[28 3[51 60 55 1[51 1[42
+4[28 42 42 42 42 42 42 42 42 42 42 1[21 28 21 2[28 28
+6[28 30[42 2[{TeXBase1Encoding ReEncodeFont}58 83.022
+/Times-Italic rf /Fs 138[105 57 96 67 1[105 105 105 153
+48 1[48 48 105 105 57 96 105 96 105 96 8[115 163 1[124
+105 3[115 2[143 105 5[105 2[124 3[105 10[96 96 96 96
+2[48 43[105 2[{TeXBase1Encoding ReEncodeFont}35 172.154
+/Helvetica-Bold rf /Ft 106[23 29 29 25[33 33 48 33 33
+18 26 22 1[33 33 33 52 18 33 18 18 33 33 22 29 33 29
+33 29 8[48 3[41 37 2[37 6[22 1[48 12[18 10[18 17 1[17
+2[22 22 5[27 31[37 2[{TeXBase1Encoding ReEncodeFont}41
+66.4176 /Times-Roman rf /Fu 134[42 42 60 42 46 28 32
+37 1[46 42 46 69 23 46 1[23 46 42 28 37 46 37 46 42 9[83
+60 60 55 46 60 3[60 78 55 2[32 65 65 51 55 60 60 55 60
+1[42 6[42 1[42 42 42 42 42 42 2[21 28 21 4[28 39[{
+TeXBase1Encoding ReEncodeFont}53 83.022 /Times-Bold rf
+/Fv 27[37 58[63 42[45 40 1[40 37 42 42 60 42 42 23 32
+28 42 42 42 42 65 23 42 23 23 42 42 28 37 42 37 42 37
+28 42 1[28 23 28 1[60 60 78 60 60 51 46 55 60 46 60 60
+74 51 60 1[28 60 60 46 51 60 55 55 60 1[37 47 47 47 23
+23 42 42 42 42 42 42 42 42 42 42 23 21 28 21 2[28 28
+28 65 69 1[42 34 28 29[46 46 2[{TeXBase1Encoding ReEncodeFont}90
+83.022 /Times-Roman rf /Fw 136[65 1[51 1[46 32 2[51 51
+1[23 2[23 51 51 1[46 51 2[46 8[55 3[51 3[55 11[60 9[28
+18[23 39[{TeXBase1Encoding ReEncodeFont}19 83.022 /Helvetica-Bold
+rf /Fx 134[80 80 112 80 88 48 80 56 1[88 88 88 128 40
+80 1[40 88 88 48 80 88 80 88 80 8[96 1[96 104 88 96 104
+2[112 104 120 88 2[40 104 112 1[96 104 104 1[104 6[48
+4[80 80 80 80 80 2[40 48 45[{TeXBase1Encoding ReEncodeFont}48
+143.462 /Helvetica-Bold rf /Fy 138[126 1[115 80 8[57
+126 126 1[115 126 11[138 2[149 126 3[138 6[57 26[57 6[57
+39[{TeXBase1Encoding ReEncodeFont}15 206.584 /Helvetica-Bold
+rf end
+%%EndProlog
+%%BeginSetup
+%%Feature: *Resolution 600dpi
+TeXDict begin
+%%BeginPaperSize: a4
+a4
+%%EndPaperSize
+
+%%EndSetup
+%%Page: 1 1
+1 0 bop Black Black 890 647 a Fy(The)58 b(PXP)f(user')-12
+b(s)58 b(guide)1384 2594 y Fx(Ger)m(d)39 b(Stolpmann)p
+Black Black eop
+%%Page: 2 2
+2 1 bop Black Black -2 579 a Fw(The)22 b(PXP)j(user')-5
+b(s)23 b(guide)-2 687 y Fv(by)d(Gerd)f(Stolpmann)-2 903
+y(Cop)o(yright)f(\251)j(1999,)e(2000)g(by)g(Gerd)h(Stolpmann)-2
+1135 y(PXP)h(is)g(a)g(v)n(alidating)d(parser)i(for)f(XML-1.0)g(which)h
+(has)g(been)g(written)g(entirely)f(in)h(Objecti)n(v)o(e)g(Caml.)-2
+1285 y Fw(Do)o(wnload)h(PXP:)j Fv(The)c(free)g(PXP)h(library)e(can)h
+(be)g(do)n(wnloaded)d(at)k(http://www)-5 b(.ocaml-programming)o(.de)o
+(/pack)o(age)o(s/.)15 b(This)-2 1393 y(user')-5 b(s)20
+b(guide)f(is)j(included.)c(Ne)n(west)j(releases)f(of)g(PXP)h(will)g(be)
+f(announced)e(in)i(The)g(OCaml)g(Link)g(Database)-2 1500
+y(\(http://www)-5 b(.npc.de/ocaml/linkdb)o(/\).)-2 1899
+y Fu(License)-2 2090 y Ft(This)16 b(document,)j(and)e(the)h(described)h
+(softw)o(are,)f("PXP",)e(are)i(cop)o(yright)i(by)d(Gerd)g(Stolpmann.)-2
+2198 y(Permission)h(is)e(hereby)j(granted,)f(free)g(of)f(char)o(ge,)h
+(to)f(an)o(y)h(person)f(obtaining)j(a)d(cop)o(y)h(of)f(this)h(document)
+g(and)g(the)f("PXP")g(softw)o(are)i(\(the)f("Softw)o(are"\),)g(to)f
+(deal)i(in)-2 2306 y(the)f(Softw)o(are)g(without)h(restriction,)g
+(including)h(without)e(limitation)i(the)e(rights)g(to)f(use,)g(cop)o(y)
+l(,)g(modify)l(,)g(mer)o(ge,)g(publish,)h(distrib)o(ute,)h(sublicense,)
+g(and/or)f(sell)-2 2414 y(copies)g(of)f(the)h(Softw)o(are,)g(and)g(to)f
+(permit)h(persons)f(to)h(whom)e(the)i(Softw)o(are)h(is)e(furnished)h
+(to)f(do)g(so,)g(subject)h(to)g(the)f(follo)n(wing)j(conditions:)-2
+2522 y(The)d(abo)o(v)o(e)h(cop)o(yright)h(notice)g(and)f(this)f
+(permission)h(notice)h(shall)f(be)g(included)h(in)e(all)h(copies)h(or)e
+(substantial)i(portions)g(of)e(the)g(Softw)o(are.)-2
+2630 y(The)g(Softw)o(are)h(is)f(pro)o(vided)i(\223as)e(is\224,)g
+(without)i(w)o(arranty)g(of)e(an)o(y)g(kind,)h(e)o(xpress)f(or)g
+(implied,)i(including)g(b)o(ut)e(not)h(limited)h(to)e(the)h(w)o
+(arranties)h(of)e(merchantability)l(,)-2 2737 y(\002tness)g(for)g(a)g
+(particular)j(purpose)e(and)g(noninfringement.)i(In)d(no)g(e)n(v)o(ent)
+h(shall)h(Gerd)e(Stolpmann)h(be)g(liable)h(for)e(an)o(y)g(claim,)h
+(damages)g(or)f(other)h(liability)l(,)i(whether)-2 2845
+y(in)d(an)g(action)i(of)e(contract,)i(tort)f(or)f(otherwise,)i(arising)
+f(from,)e(out)i(of)f(or)g(in)g(connection)j(with)e(the)f(Softw)o(are)i
+(or)e(the)h(use)f(or)g(other)h(dealings)h(in)e(the)h(softw)o(are.)p
+Black Black eop
+%%Page: 3 3
+3 2 bop Black Black -2 621 a Fs(T)-14 b(ab)n(le)48 b(of)g(Contents)396
+815 y Fu(I.)21 b(User')m(s)g(guide)p Black 4 w(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black 4 w(6)596
+943 y Fv(1.)f(What)g(is)h(XML?)p Black 4 w(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black 4 w(7)795 1051
+y(1.1.)e(Introduction)p Black 14 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black 4 w(7)994 1159
+y(1.1.1.)g(The)g("hello)h(w)o(orld")g(e)o(xample)p Black
+13 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black 4 w(7)994 1267 y(1.1.2.)f(XML)h(parsers)g(and)f
+(processors)p Black 3 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black 4 w(9)994 1375 y(1.1.3.)g(Discussion)p
+Black 9 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+4 w(9)795 1483 y(1.2.)g(Highlights)g(of)h(XML)p Black
+10 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(11)994
+1591 y(1.2.1.)f(The)g(DTD)i(and)e(the)i(instance)p Black
+15 w(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(11)994 1699 y(1.2.2.)e(Reserv)o(ed)g(characters)p
+Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(12)994 1807 y(1.2.3.)g(Elements)g(and)h
+(ELEMENT)f(declarations)p Black 7 w(.)p Black Black -2
+w(.)p Black Black(.)p Black Black(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(13)994
+1915 y(1.2.4.)g(Attrib)n(ute)g(lists)j(and)e(A)-9 b(TTLIST)19
+b(declarations)p Black 6 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(15)994 2023 y(1.2.5.)g(P)o(arsed)g(entities)p
+Black 18 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(16)994 2131 y(1.2.6.)g(Notations)g(and)h
+(unparsed)e(entities)p Black 14 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(19)795 2238 y(1.3.)h(A)i(complete)e(e)o(xample:)g
+(The)h Fr(r)m(eadme)f Fv(DTD)p Black 3 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(20)596 2346 y(2.)h(Using)g(PXP)p Black
+6 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(24)795 2454 y(2.1.)f(V)-9 b(alidation)p
+Black 3 w(.)p Black Black -2 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
+2562 y(2.2.)19 b(Ho)n(w)h(to)g(parse)g(a)h(document)d(from)h(an)h
+(application)p Black 10 w(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795
+2670 y(2.3.)f(Class-based)h(processing)f(of)h(the)g(node)g(tree)p
+Black 8 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(29)795
+2778 y(2.4.)f(Example:)g(An)h(HTML)g(back)o(end)f(for)g(the)i
+Fr(r)m(eadme)e Fv(DTD)p Black 3 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(33)994 2886 y(2.4.1.)g(Header)p
+Black 9 w(.)p Black Black -2 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(33)994 2994 y(2.4.2.)g(T)-7 b(ype)19
+b(declarations)p Black 14 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(33)994 3102 y(2.4.3.)g(Class)i Fq(store)p Black
+11 w Fv(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(34)994 3210 y(2.4.4.)e(Function)g
+Fq(escape_html)p Black Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(35)994 3318 y(2.4.5.)g(V)-5 b(irtual)20 b(class)h
+Fq(shared)p Black 4 w Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(35)994 3426 y(2.4.6.)e(Class)i
+Fq(only_data)p Black 17 w Fv(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(36)994
+3534 y(2.4.7.)e(Class)i Fq(readme)p Black 8 w Fv(.)p
+Black Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(36)994 3642 y(2.4.8.)e(Classes)i
+Fq(section)p Fv(,)f Fq(sect1)p Fv(,)f Fq(sect2)p Fv(,)h(and)g
+Fq(sect3)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(39)994 3749 y(2.4.9.)f(Classes)i
+Fq(map_tag)p Fv(,)f Fq(p)p Fv(,)g Fq(em)p Fv(,)g Fq(ul)p
+Fv(,)g Fq(li)p Black 16 w Fv(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(39)994
+3857 y(2.4.10.)e(Class)k Fq(br)p Black Fv(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(40)994 3965 y(2.4.11.)c(Class)k
+Fq(code)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(40)994 4073 y(2.4.12.)c(Class)k
+Fq(a)p Black 4 w Fv(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(41)994
+4181 y(2.4.13.)c(Class)k Fq(footnote)p Black 1 w Fv(.)p
+Black Black -2 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(42)994 4289
+y(2.4.14.)c(The)i(speci\002cation)f(of)h(the)g(document)f(model)p
+Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(43)596 4397 y(3.)h(The)f(objects)h
+(representing)e(the)j(document)p Black 4 w(.)p Black
+Black -3 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(46)795 4505 y(3.1.)e(The)h Fq(document)f Fv(class)p
+Black 7 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(46)795 4613 y(3.2.)g(The)h(class)h(type)f
+Fq(node)p Black 2 w Fv(.)p Black Black -2 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(47)994 4721 y(3.2.1.)f(The)g(structure)h(of)g(document)e(trees)p
+Black 3 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(49)994
+4829 y(3.2.2.)h(The)g(methods)h(of)f(the)i(class)g(type)f
+Fq(node)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(52)p Black 3842
+5278 a Fr(3)p Black eop
+%%Page: 4 4
+4 3 bop Black Black 994 579 a Fv(3.2.3.)19 b(The)g(class)j
+Fq(element_impl)p Black 2 w Fv(.)p Black Black -3 w(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(56)994 687 y(3.2.4.)d(The)g(class)j Fq(data_impl)p
+Black 12 w Fv(.)p Black Black -2 w(.)p Black Black -1
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(57)994
+795 y(3.2.5.)d(The)g(type)h Fq(spec)p Black 5 w Fv(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(58)994 903 y(3.2.6.)f(Examples)p Black
+5 w(.)p Black Black -3 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(60)994
+1011 y(3.2.7.)g(Iterators)p Black 12 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(64)795 1119 y(3.3.)g(The)h(class)h(type)f Fq(extension)p
+Black 6 w Fv(.)p Black Black -2 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(65)994 1226 y(3.3.1.)f(Ho)n(w)h(to)g(de\002ne)
+g(an)g(e)o(xtension)f(class)p Black 13 w(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(66)994 1334
+y(3.3.2.)g(Ho)n(w)h(to)g(bind)f(e)o(xtension)g(classes)i(to)g(element)e
+(types)p Black 10 w(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(68)795 1442 y(3.4.)g(Details)i(of)f(the)g(mapping)e(from)i(XML)g
+(te)o(xt)g(to)g(the)g(tree)h(representation)p Black 13
+w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(69)994
+1550 y(3.4.1.)e(The)g(representation)g(of)g(character)n(-free)f
+(elements)p Black 9 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(69)994 1658 y(3.4.2.)h(The)g(representation)g(of)g(character)g
+(data)p Black 10 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(70)994 1766
+y(3.4.3.)g(The)g(representation)g(of)g(entities)i(within)f(documents)p
+Black 12 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(70)994 1874 y(3.4.4.)f(The)g(representation)g
+(of)g(attrib)n(utes)p Black 20 w(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(71)994 1982 y(3.4.5.)g(The)g(representation)g(of)g
+(processing)g(instructions)p Black(.)p Black Black -1
+w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(71)994 2090 y(3.4.6.)g(The)g
+(representation)g(of)g(comments)p Black 7 w(.)p Black
+Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(71)994 2198 y(3.4.7.)g(The)g(attrib)n(utes)i
+Fq(xml:lang)e Fv(and)h Fq(xml:space)p Black 10 w Fv(.)p
+Black Black -2 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(72)994 2306 y(3.4.8.)f(And)g(what)h(about)g(namespaces?)p
+Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(72)596
+2414 y(4.)g(Con\002guring)e(and)h(calling)h(the)g(parser)p
+Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(73)795 2522 y(4.1.)f(Ov)o(ervie)n(w)p
+Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(73)795
+2630 y(4.2.)g(Resolv)o(ers)h(and)g(sources)p Black 2
+w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(75)994 2737
+y(4.2.1.)f(Using)h(the)g(b)n(uilt-in)f(resolv)o(ers)h(\(called)f
+(sources\))p Black 5 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(75)994 2845 y(4.2.2.)g(The)g(resolv)o(er)g(API)p
+Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(76)994 2953 y(4.2.3.)g(Prede\002ned)f(resolv)o(er)h
+(components)p Black 13 w(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black 4 w(78)795 3061
+y(4.3.)g(The)h(DTD)g(classes)p Black 1 w(.)p Black Black
+1 w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(81)795
+3169 y(4.4.)f(In)m(v)n(oking)f(the)i(parser)p Black 14
+w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(89)994
+3277 y(4.4.1.)f(Def)o(aults)p Black 10 w(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(89)994 3385 y(4.4.2.)g(P)o(arsing)g(functions)p
+Black 4 w(.)p Black Black -3 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(90)994 3493 y(4.4.3.)g(Con\002guration)f(options)p
+Black 19 w(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(91)994 3601 y(4.4.4.)h(Which)h
+(con\002guration)d(should)i(I)i(use?)p Black 18 w(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(93)795 3709 y(4.5.)e(Updates)p Black 10 w(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(95)p Black 3842 5278 a
+Fr(4)p Black eop
+%%Page: 5 5
+5 4 bop Black Black -2 621 a Fs(List)48 b(of)g(Figures)396
+815 y Fv(3-1.)19 b(A)i(tree)f(with)h(element)e(nodes,)h(data)g(nodes,)f
+(and)g(attrib)n(utes)p Black 18 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black 4 w(49)396 923 y(3-2.)g(Nodes)h(are)g(doubly)f(link)o
+(ed)g(trees)p Black 15 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black 4 w(50)396
+1031 y(3-3.)g(A)i(node)e(can)h(only)g(be)g(added)f(if)h(it)h(is)g(a)g
+(root)p Black 5 w(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black 4 w(51)396 1139 y(3-4.)e(A)i(deleted)f(node)f
+(becomes)g(the)h(root)g(of)g(the)g(subtree)p Black 3
+w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+4 w(51)396 1247 y(3-5.)f(The)h(clone)g(of)g(a)g(subtree)p
+Black 18 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black 4 w(52)396 1355 y(3-6.)f(The)h(structure)g
+(of)f(nodes)h(and)g(e)o(xtensions)p Black 18 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black -1 w(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black Black(.)p Black Black(.)p Black Black(.)p Black
+Black -1 w(.)p Black Black(.)p Black Black(.)p Black
+Black(.)p Black Black -1 w(.)p Black Black(.)p Black
+Black(.)p Black Black(.)p Black Black -1 w(.)p Black
+Black(.)p Black Black(.)p Black Black(.)p Black Black
+-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p
+Black Black -1 w(.)p Black Black(.)p Black Black(.)p
+Black Black(.)p Black Black(.)p Black Black -1 w(.)p
+Black 4 w(65)p Black 3842 5278 a Fr(5)p Black eop
+%%Page: 6 6
+6 5 bop Black Black 1241 647 a Fy(I.)58 b(User')-12 b(s)57
+b(guide)p Black Black eop
+%%Page: 7 7
+7 6 bop Black Black -2 621 a Fs(Chapter)48 b(1.)f(What)h(is)f(XML?)-2
+1055 y Fx(1.1.)39 b(Intr)m(oduction)396 1235 y Fv(XML)20
+b(\(short)g(for)f Fr(Extensible)h(Markup)g(Langua)o(g)o(e)p
+Fv(\))e(generalizes)h(the)h(idea)g(that)g(te)o(xt)g(documents)f(are)h
+(typically)396 1343 y(structured)f(in)h(sections,)g(sub-sections,)f
+(paragraphs,)f(and)i(so)g(on.)g(The)g(format)f(of)h(the)g(document)e
+(is)j(not)f(\002x)o(ed)g(\(as,)396 1451 y(for)g(e)o(xample,)e(in)j
+(HTML\),)e(b)n(ut)h(can)g(be)g(declared)f(by)h(a)h(so-called)e(DTD)i
+(\(document)c(type)j(de\002nition\).)f(The)g(DTD)396
+1559 y(describes)h(only)f(the)i(rules)f(ho)n(w)f(the)i(document)d(can)i
+(be)g(structured,)e(b)n(ut)j(not)e(ho)n(w)h(the)g(document)e(can)i(be)
+396 1667 y(processed.)f(F)o(or)h(e)o(xample,)e(if)j(you)e(w)o(ant)i(to)
+f(publish)f(a)i(book)e(that)h(uses)h(XML)f(markup,)e(you)h(will)i(need)
+f(a)g(processor)396 1775 y(that)h(con)m(v)o(erts)d(the)i(XML)g(\002le)h
+(into)f(a)h(printable)e(format)g(such)h(as)h(Postscript.)f(On)g(the)g
+(one)g(hand,)f(the)h(structure)f(of)396 1883 y(XML)h(documents)f(is)i
+(con\002gurable;)d(on)i(the)g(other)f(hand,)g(there)h(is)h(no)f(longer)
+f(a)h(canonical)f(interpretation)f(of)i(the)396 1991
+y(elements)g(of)g(the)g(document;)f(for)g(e)o(xample)g(one)h(XML)g(DTD)
+g(might)g(w)o(ant)g(that)g(paragraphes)e(are)i(delimited)g(by)396
+2099 y Fq(para)g Fv(tags,)h(and)e(another)g(DTD)h(e)o(xpects)g
+Fq(p)g Fv(tags)h(for)e(the)i(same)f(purpose.)e(As)j(a)g(result,)f(for)g
+(e)n(v)o(ery)e(DTD)j(a)f(ne)n(w)396 2206 y(processor)f(is)i(required.)
+396 2356 y(Although)e(XML)h(can)g(be)g(used)g(to)g(e)o(xpress)g
+(structured)f(te)o(xt)h(documents)e(it)j(is)g(not)f(limited)g(to)g
+(this)h(kind)e(of)396 2464 y(application.)g(F)o(or)h(e)o(xample,)e(XML)
+i(can)g(also)h(be)f(used)g(to)g(e)o(xchange)e(structured)h(data)h(o)o
+(v)o(er)f(a)h(netw)o(ork,)f(or)h(to)396 2572 y(simply)g(store)g
+(structured)f(data)h(in)g(\002les.)h(Note)f(that)h(XML)f(documents)e
+(cannot)i(contain)f(arbitrary)f(binary)h(data)396 2680
+y(because)g(some)g(characters)g(are)g(forbidden;)e(for)i(some)g
+(applications)g(you)f(need)h(to)h(encode)e(binary)g(data)h(as)h(te)o
+(xt)g(\(e.g.)396 2788 y(the)g(base)h(64)f(encoding\).)-2
+3116 y Fp(1.1.1.)35 b(The)f("hello)g(w)n(orld")e(e)n(xample)396
+3283 y Fv(The)20 b(follo)n(wing)f(e)o(xample)f(sho)n(ws)j(a)f(v)o(ery)f
+(simple)i(DTD,)f(and)f(a)i(corresponding)c(document)h(instance.)h(The)
+396 3391 y(document)f(is)k(structured)c(such)i(that)h(it)f(consists)h
+(of)f(sections,)g(and)g(that)g(sections)g(consist)h(of)f(paragraphs,)d
+(and)j(that)396 3499 y(paragraphs)e(contain)h(plain)h(te)o(xt:)396
+3679 y Fq(<!ELEMENT)44 b(document)f(\(section\)+>)396
+3777 y(<!ELEMENT)h(section)f(\(paragraph\)+>)396 3874
+y(<!ELEMENT)h(paragraph)f(\(#PCDATA\)>)396 4065 y Fv(The)20
+b(follo)n(wing)f(document)f(is)j(an)f(instance)g(of)g(this)h(DTD:)396
+4245 y Fq(<?xml)44 b(version="1.0")f(encoding="ISO-8859-1"?>)396
+4342 y(<!DOCTYPE)h(document)f(SYSTEM)h("simple.dtd">)396
+4439 y(<document>)486 4536 y(<section>)576 4633 y(<paragraph>This)e(is)
+i(a)h(paragraph)e(of)i(the)f(first)g(section.</paragraph>)576
+4731 y(<paragraph>This)e(is)i(another)g(paragraph)f(of)i(the)f(first)g
+(section.</paragraph>)486 4828 y(</section>)p Black 3839
+5278 a Fr(7)p Black eop
+%%Page: 8 8
+8 7 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 486 579 a Fq(<section>)576 676 y(<paragraph>This)42
+b(is)i(the)h(only)f(paragraph)f(of)i(the)f(second)g
+(section.</paragraph>)486 773 y(</section>)396 870 y(</document>)396
+1061 y Fv(As)21 b(in)g(HTML)f(\(and,)f(of)h(course,)f(in)h(grand-f)o
+(ather)d(SGML\),)j(the)g("pieces")g(of)g(the)g(document)f(are)h
+(delimited)f(by)396 1169 y(element)h(braces,)f(i.e.)i(such)f(a)g(piece)
+g(be)o(gins)f(with)i Fo(<)p Fq(name-of-the-type-of-the-piece)p
+Fo(>)15 b Fv(and)20 b(ends)g(with)396 1277 y Fo(<)p Fq
+(/name-of-the-type-of-the-piece)p Fo(>)p Fv(,)15 b(and)20
+b(the)g(pieces)g(are)g(called)g Fr(elements)p Fv(.)g(Unlik)o(e)g(HTML)g
+(and)396 1385 y(SGML,)g(both)g(start)g(tags)h(and)f(end)f(tags)i
+(\(i.e.)f(the)g(delimiters)g(written)g(in)g(angle)g(brack)o(ets\))f
+(can)h(ne)n(v)o(er)f(be)h(left)g(out.)396 1493 y(F)o(or)g(e)o(xample,)f
+(HTML)h(calls)h(the)f(paragraphs)e(simply)i Fq(p)p Fv(,)g(and)f
+(because)h(paragraphs)e(ne)n(v)o(er)h(contain)g(paragraphs,)f(a)396
+1601 y(sequence)h(of)h(se)n(v)o(eral)g(paragraphs)e(can)i(be)g(written)
+g(as:)396 1781 y Fq(<p>First)44 b(paragraph)396 1878
+y(<p>Second)g(paragraph)396 2069 y Fv(This)21 b(is)g(not)f(possible)g
+(in)g(XML;)g(continuing)e(our)i(e)o(xample)e(abo)o(v)o(e)h(we)h(must)h
+(al)o(w)o(ays)f(write)396 2249 y Fq(<paragraph>First)42
+b(paragraph</paragraph>)396 2346 y(<paragraph>Second)g
+(paragraph</paragraph>)396 2537 y Fv(The)20 b(rationale)f(behind)g
+(that)h(is)i(to)e(\(1\))f(simplify)h(the)g(de)n(v)o(elopment)d(of)j
+(XML)h(parsers)f(\(you)e(need)i(not)g(con)m(v)o(ert)e(the)396
+2645 y(DTD)j(into)f(a)g(deterministic)f(\002nite)i(automaton)d(which)i
+(is)h(required)d(to)j(detect)f(omitted)f(tags\),)h(and)g(to)g(\(2\))g
+(mak)o(e)f(it)396 2753 y(possible)h(to)h(parse)e(the)i(document)d
+(independent)f(of)j(whether)f(the)i(DTD)f(is)h(kno)n(wn)e(or)h(not.)396
+2903 y(The)g(\002rst)h(line)f(of)g(our)g(sample)g(document,)396
+3083 y Fq(<?xml)44 b(version="1.0")f(encoding="ISO-8859-1"?>)396
+3274 y Fv(is)21 b(the)e(so-called)g Fr(XML)h(declar)o(ation)p
+Fv(.)d(It)j(e)o(xpresses)e(that)i(the)f(document)f(follo)n(ws)h(the)g
+(con)m(v)o(entions)e(of)i(XML)g(v)o(ersion)396 3382 y(1.0,)h(and)f
+(that)h(the)h(document)d(is)j(encoded)d(using)i(characters)f(from)g
+(the)i(ISO-8859-1)c(character)i(set)i(\(often)e(kno)n(wn)396
+3490 y(as)i("Latin)e(1",)g(mostly)h(used)f(in)h(W)-7
+b(estern)20 b(Europe\).)d(Although)h(the)i(XML)g(declaration)e(is)i
+(not)g(mandatory)-5 b(,)16 b(it)21 b(is)f(good)396 3598
+y(style)h(to)f(include)f(it;)i(e)n(v)o(erybody)c(sees)k(at)g(the)f
+(\002rst)h(glance)f(that)g(the)g(document)e(uses)j(XML)f(markup)f(and)g
+(not)h(the)396 3706 y(similar)n(-looking)e(HTML)i(and)g(SGML)g(markup)f
+(languages.)f(If)i(you)g(omit)g(the)g(XML)g(declaration,)e(the)j
+(parser)e(will)396 3813 y(assume)h(that)h(the)f(document)e(is)j
+(encoded)e(as)i(UTF-8)e(or)h(UTF-16)f(\(there)h(is)h(a)g(rule)e(that)i
+(mak)o(es)f(it)h(possible)f(to)396 3921 y(distinguish)f(between)h
+(UTF-8)g(and)f(UTF-16)g(automatically\);)g(these)h(are)g(encodings)f
+(of)h(Unicode')-5 b(s)19 b(uni)n(v)o(ersal)396 4029 y(character)g(set.)
+i(\(Note)f(that)g(PXP,)h(unlik)o(e)e(its)i(predecessor)e("Markup",)f
+(fully)i(supports)f(Unicode.\))396 4179 y(The)h(second)f(line,)396
+4359 y Fq(<!DOCTYPE)44 b(document)f(SYSTEM)h("simple.dtd">)396
+4550 y Fv(names)20 b(the)g(DTD)h(that)f(is)h(going)e(to)h(be)g(used)g
+(for)g(the)g(rest)h(of)f(the)g(document.)e(In)i(general,)f(it)i(is)g
+(possible)f(that)g(the)396 4658 y(DTD)h(consists)f(of)g(tw)o(o)h
+(parts,)f(the)g(so-called)f(e)o(xternal)g(and)h(the)g(internal)f
+(subset.)h("External")f(means)h(that)g(the)h(DTD)396
+4766 y(e)o(xists)g(as)g(a)f(second)g(\002le;)h("internal")e(means)h
+(that)g(the)g(DTD)h(is)g(included)d(in)j(the)f(same)g(\002le.)h(In)f
+(this)g(e)o(xample,)f(there)p Black 3842 5278 a Fr(8)p
+Black eop
+%%Page: 9 9
+9 8 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(is)g(only)f(an)g(e)o(xternal)f(subset,)h(and)g(the)g
+(system)g(identi\002er)g("simple.dtd")e(speci\002es)j(where)f(the)g
+(DTD)g(\002le)h(can)f(be)396 687 y(found.)e(System)j(identi\002ers)f
+(are)g(interpreted)e(as)j(URLs;)g(for)f(instance)g(this)g(w)o(ould)g
+(be)g(le)o(gal:)396 867 y Fq(<!DOCTYPE)44 b(document)f(SYSTEM)h
+("http://host/location/simple.dtd">)396 1058 y Fv(Please)21
+b(note)f(that)g(PXP)h(cannot)e(interpret)g(HTTP)i(identi\002ers)e(by)h
+(def)o(ault,)f(b)n(ut)i(it)g(is)g(possible)f(to)g(change)f(the)396
+1166 y(interpretation)f(of)i(system)h(identi\002ers.)396
+1315 y(The)f(w)o(ord)g(immediately)f(follo)n(wing)f Fq(DOCTYPE)i
+Fv(determines)f(which)g(of)h(the)g(declared)f(element)h(types)g(\(here)
+396 1423 y("document",)e("section",)h(and)h("paragraph"\))d(is)k(used)f
+(for)g(the)g(outermost)f(element,)g(the)h Fr(r)l(oot)h(element)q
+Fv(.)f(In)g(this)396 1531 y(e)o(xample)f(it)i(is)g Fq(document)f
+Fv(because)f(the)h(outermost)f(element)h(is)h(delimited)e(by)h
+Fo(<)p Fq(document)p Fo(>)f Fv(and)396 1639 y Fo(<)p
+Fq(/document)p Fo(>)p Fv(.)396 1789 y(The)h(DTD)g(consists)h(of)f
+(three)g(declarations)f(for)g(element)h(types:)g Fq(document)p
+Fv(,)f Fq(section)p Fv(,)g(and)h Fq(paragraph)p Fv(.)f(Such)396
+1896 y(a)i(declaration)d(has)j(tw)o(o)f(parts:)396 2077
+y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(content-model)p Fo(>)396
+2268 y Fv(The)20 b(content)f(model)h(is)h(a)f(re)o(gular)f(e)o
+(xpression)g(which)g(describes)h(the)g(possible)g(inner)f(structure)h
+(of)g(the)g(element.)396 2376 y(Here,)g Fq(document)f
+Fv(contains)h(one)g(or)g(more)f(sections,)h(and)g(a)g
+Fq(section)g Fv(contains)f(one)h(or)g(more)f(paragraphs.)f(Note)396
+2483 y(that)j(these)f(tw)o(o)g(element)g(types)g(are)g(not)g(allo)n
+(wed)f(to)i(contain)e(arbitrary)g(te)o(xt.)g(Only)h(the)g
+Fq(paragraph)g Fv(element)f(type)396 2591 y(is)i(declared)e(such)h
+(that)h(parsed)e(character)g(data)h(\(indicated)f(by)h(the)g(symbol)f
+Fq(#PCDATA)p Fv(\))g(is)i(permitted.)396 2741 y(See)g(belo)n(w)e(for)h
+(a)h(detailed)e(discussion)h(of)g(content)f(models.)-2
+3110 y Fp(1.1.2.)35 b(XML)e(par)n(ser)n(s)h(and)g(pr)n(ocessor)n(s)396
+3278 y Fv(XML)20 b(documents)f(are)h(human-readable,)c(b)n(ut)21
+b(this)f(is)h(not)f(the)h(main)e(purpose)g(of)h(this)h(language.)d(XML)
+i(has)g(been)396 3386 y(designed)f(such)h(that)g(documents)f(can)h(be)g
+(read)g(by)f(a)i(program)d(called)i(an)g Fr(XML)h(par)o(ser)r
+Fv(.)f(The)g(parser)g(checks)f(that)396 3494 y(the)h(document)f(is)i
+(well-formatted,)d(and)h(it)i(represents)f(the)g(document)e(as)j
+(objects)f(of)g(the)g(programming)d(language.)396 3602
+y(There)j(are)g(tw)o(o)g(aspects)h(when)e(checking)g(the)h(document:)e
+(First,)j(the)f(document)e(must)j(follo)n(w)e(some)h(basic)396
+3710 y(syntactic)g(rules,)g(such)g(as)h(that)f(tags)h(are)f(written)g
+(in)g(angle)g(brack)o(ets,)f(that)h(for)g(e)n(v)o(ery)f(start)h(tag)h
+(there)e(must)i(be)f(a)396 3818 y(corresponding)d(end)j(tag)g(and)f(so)
+i(on.)f(A)g(document)e(respecting)h(these)i(rules)f(is)h
+Fr(well-formed)r Fv(.)f(Second,)f(the)396 3926 y(document)f(must)j
+(match)e(the)i(DTD)f(in)g(which)g(case)h(the)f(document)e(is)j
+Fr(valid)r Fv(.)f(Man)o(y)f(parsers)h(check)f(only)h(on)396
+4034 y(well-formedness)e(and)i(ignore)f(the)h(DTD;)h(PXP)g(is)g
+(designed)e(such)g(that)i(it)g(can)f(e)n(v)o(en)f(v)n(alidate)g(the)i
+(document.)396 4183 y(A)g(parser)f(does)f(not)h(mak)o(e)g(a)h(sensible)
+f(application,)e(it)j(only)f(reads)g(XML)g(documents.)e(The)i(whole)g
+(application)396 4291 y(w)o(orking)f(with)h(XML-formatted)e(data)i(is)h
+(called)f(an)g Fr(XML)h(pr)l(ocessor)r Fv(.)f(Often)g(XML)g(processors)
+f(con)m(v)o(ert)396 4399 y(documents)g(into)h(another)e(format,)h(such)
+h(as)h(HTML)f(or)g(Postscript.)g(Sometimes)g(processors)f(e)o(xtract)g
+(data)h(of)g(the)396 4507 y(documents)f(and)g(output)g(the)i(processed)
+e(data)h(again)f(XML-formatted.)e(The)j(parser)g(can)g(help)f(the)i
+(application)396 4615 y(processing)e(the)h(document;)f(for)g(e)o
+(xample)g(it)i(can)f(pro)o(vide)e(means)i(to)g(access)h(the)f(document)
+e(in)j(a)f(speci\002c)h(manner)-5 b(.)396 4723 y(PXP)21
+b(supports)e(an)i(object-oriented)c(access)k(layer)e(specially)-5
+b(.)p Black 3842 5278 a Fr(9)p Black eop
+%%Page: 10 10
+10 9 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 583 a Fp(1.1.3.)35 b(Discussion)396 751 y Fv(As)21
+b(we)g(ha)n(v)o(e)e(seen,)h(there)g(are)g(tw)o(o)h(le)n(v)o(els)f(of)g
+(description:)f(On)h(the)g(one)g(hand,)f(XML)h(can)g(de\002ne)f(rules)i
+(about)e(the)396 859 y(format)g(of)h(a)h(document)d(\(the)i(DTD\),)g
+(on)f(the)i(other)e(hand,)g(XML)h(e)o(xpresses)g(structured)f
+(documents.)f(There)h(are)h(a)396 967 y(number)f(of)h(possible)f
+(applications:)p Black 396 1199 a Ft(\225)p Black 60
+w Fv(XML)i(can)f(be)g(used)g(to)g(e)o(xpress)f(structured)g(te)o(xts.)h
+(Unlik)o(e)g(HTML,)g(there)g(is)h(no)e(canonical)g(interpretation;)g
+(one)479 1307 y(w)o(ould)h(ha)n(v)o(e)f(to)i(write)f(a)h(back)o(end)d
+(for)i(the)g(DTD)g(that)h(translates)f(the)g(structured)f(te)o(xts)h
+(into)g(a)h(format)e(that)479 1415 y(e)o(xisting)h(bro)n(wsers,)f
+(printers)g(etc.)i(understand.)c(The)j(adv)n(antage)e(of)i(a)h
+(self-de\002ned)e(document)f(format)h(is)i(that)f(it)479
+1523 y(is)h(possible)f(to)h(design)e(the)h(format)f(in)i(a)f(more)g
+(problem-oriented)c(w)o(ay)-5 b(.)20 b(F)o(or)f(e)o(xample,)g(if)h(the)
+h(task)f(is)h(to)g(e)o(xtract)479 1631 y(reports)f(from)f(a)h
+(database,)g(one)f(can)h(use)h(a)f(DTD)h(that)f(re\003ects)h(the)f
+(structure)f(of)h(the)g(report)f(or)h(the)g(database.)g(A)479
+1739 y(possible)g(approach)e(w)o(ould)i(be)g(to)g(ha)n(v)o(e)g(an)g
+(element)f(type)h(for)g(e)n(v)o(ery)f(database)g(table)h(and)g(for)g(e)
+n(v)o(ery)e(column.)479 1847 y(Once)i(the)g(DTD)h(has)f(been)g
+(designed,)e(the)j(report)e(procedure)e(can)j(be)g(splitted)h(up)e(in)i
+(a)f(part)g(that)h(selects)g(the)479 1955 y(database)f(ro)n(ws)g(and)g
+(outputs)f(them)h(as)h(an)f(XML)g(document)e(according)g(to)j(the)f
+(DTD,)g(and)g(in)g(a)g(part)g(that)479 2063 y(translates)h(the)f
+(document)e(into)i(other)f(formats.)g(Of)i(course,)e(the)h(latter)h
+(part)e(can)h(be)h(solv)o(ed)e(in)h(a)h(generic)e(w)o(ay)-5
+b(,)479 2170 y(e.g.)20 b(there)g(may)f(be)h(con\002gurable)e(back)o
+(ends)h(for)h(all)g(DTDs)h(that)f(follo)n(w)g(the)g(approach)e(and)i
+(ha)n(v)o(e)f(element)h(types)479 2278 y(for)g(tables)g(and)g(columns.)
+479 2428 y(XML)h(plays)f(the)g(role)g(of)g(a)g(con\002gurable)e
+(intermediate)h(format.)g(The)g(database)h(e)o(xtraction)e(function)h
+(can)h(be)479 2536 y(written)g(without)g(ha)n(ving)f(to)h(kno)n(w)f
+(the)h(details)h(of)f(typesetting;)f(the)h(back)o(ends)f(can)h(be)g
+(written)g(without)g(ha)n(ving)479 2644 y(to)h(kno)n(w)e(the)h(details)
+h(of)e(the)i(database.)479 2793 y(Of)g(course,)e(there)h(are)g
+(traditional)f(solutions.)g(One)h(can)g(de\002ne)g(an)g(ad)g(hoc)g
+(intermediate)e(te)o(xt)j(\002le)f(format.)f(This)479
+2901 y(disadv)n(antage)f(is)k(that)e(there)g(are)g(no)f(names)h(for)g
+(the)g(pieces)g(of)g(the)g(format,)f(and)h(that)g(such)g(formats)g
+(usually)f(lack)479 3009 y(of)h(documentation)d(because)j(of)g(this.)g
+(Another)f(solution)g(w)o(ould)h(be)g(to)g(ha)n(v)o(e)g(a)h(binary)e
+(representation,)e(either)j(as)479 3117 y(language-dependent)c(or)k
+(language-independent)14 b(structure)20 b(\(e)o(xample)e(of)i(the)g
+(latter)h(can)f(be)g(found)e(in)j(RPC)479 3225 y(implementations\).)d
+(The)i(disadv)n(antage)e(is)j(that)f(it)h(is)g(harder)e(to)i(vie)n(w)f
+(such)g(representations,)e(one)h(has)i(to)f(write)479
+3333 y(pretty)g(printers)f(for)h(this)g(purpose.)f(It)h(is)h(also)g
+(more)e(dif)n(\002cult)h(to)g(enter)g(test)h(data;)f(XML)g(is)h(plain)f
+(te)o(xt)g(that)h(can)f(be)479 3441 y(written)g(using)g(an)g(arbitrary)
+f(editor)g(\(Emacs)h(has)g(e)n(v)o(en)f(a)i(good)e(XML)h(mode,)f
+(PSGML\).)h(All)h(these)f(alternati)n(v)o(es)479 3549
+y(suf)n(fer)g(from)f(a)h(missing)g(structure)g(check)o(er)m(,)e(i.e.)i
+(the)h(programs)d(processing)h(these)h(formats)f(usually)h(do)g(not)479
+3657 y(check)g(the)g(input)f(\002le)i(or)f(input)g(object)f(in)i
+(detail;)f(XML)g(parsers)g(check)f(the)h(syntax)g(of)g(the)g(input)g
+(\(the)f(so-called)479 3765 y(well-formedness)f(check\),)h(and)h(the)g
+(adv)n(anced)e(parsers)i(lik)o(e)g(PXP)h(e)n(v)o(en)f(v)o(erify)e(that)
+j(the)f(structure)f(matches)h(the)479 3872 y(DTD)h(\(the)f(so-called)f
+(v)n(alidation\).)p Black 396 4022 a Ft(\225)p Black
+60 w Fv(XML)i(can)f(be)g(used)g(as)g(con\002gurable)e(communication)g
+(language.)g(A)i(fundamental)e(problem)h(of)h(e)n(v)o(ery)479
+4130 y(communication)e(is)j(that)f(sender)f(and)h(recei)n(v)o(er)f
+(must)h(follo)n(w)g(the)g(same)g(con)m(v)o(entions)e(about)h(the)h
+(language.)e(F)o(or)479 4238 y(data)i(e)o(xchange,)e(the)i(question)f
+(is)j(usually)d(which)h(data)g(records)f(and)h(\002elds)g(are)g(a)n(v)n
+(ailable,)g(ho)n(w)g(the)o(y)f(are)479 4346 y(syntactically)h
+(composed,)e(and)i(which)f(v)n(alues)h(are)g(possible)g(for)g(the)g(v)n
+(arious)f(\002elds.)h(Similar)h(questions)e(arise)479
+4454 y(for)h(te)o(xt)g(document)e(e)o(xchange.)g(XML)i(does)g(not)g
+(answer)g(these)g(problems)f(completely)-5 b(,)18 b(b)n(ut)i(it)h
+(reduces)e(the)479 4562 y(number)g(of)h(ambiguities)f(for)g(such)h(con)
+m(v)o(entions:)e(The)i(outlines)f(of)h(the)g(syntax)g(are)g
+(speci\002ed)g(by)g(the)g(DTD)g(\(b)n(ut)479 4669 y(not)g(necessarily)g
+(the)g(details\),)g(and)g(XML)g(introduces)e(canonical)h(names)h(for)g
+(the)g(components)e(of)i(documents)479 4777 y(such)g(that)h(it)f(is)i
+(simpler)d(to)i(describe)e(the)h(rest)h(of)f(the)g(syntax)g(and)f(the)h
+(semantics)h(informally)-5 b(.)p Black 3800 5278 a Fr(10)p
+Black eop
+%%Page: 11 11
+11 10 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black Black 396 579 a Ft(\225)p Black 60 w Fv(XML)f(is)g(a)g(data)f
+(storage)g(format.)f(Currently)-5 b(,)17 b(e)n(v)o(ery)h(softw)o(are)h
+(product)f(tends)h(to)h(use)f(its)i(o)n(wn)d(w)o(ay)i(to)f(store)h
+(data;)479 687 y(commercial)f(softw)o(are)h(often)f(does)h(not)g
+(describe)f(such)h(formats,)f(and)h(it)h(is)g(a)g(pain)e(to)i(inte)o
+(grate)e(such)h(softw)o(are)479 795 y(into)g(a)g(bigger)f(project.)f
+(XML)i(can)g(help)f(to)h(impro)o(v)o(e)e(this)j(situation)e(when)g(se)n
+(v)o(eral)g(applications)g(share)h(the)g(same)479 903
+y(syntax)g(of)g(data)g(\002les.)h(DTDs)f(are)g(then)g(neutral)g
+(instances)g(that)g(check)f(the)h(format)g(of)f(data)i(\002les)g
+(independent)c(of)479 1011 y(applications.)-2 1512 y
+Fx(1.2.)39 b(Highlights)e(of)i(XML)396 1692 y Fv(This)21
+b(section)f(e)o(xplains)f(man)o(y)g(of)h(the)g(features)f(of)h(XML,)g
+(b)n(ut)h(not)e(all,)i(and)f(some)g(features)f(not)h(in)g(detail.)g(F)o
+(or)g(a)396 1800 y(complete)f(description,)g(see)i(the)f(XML)g
+(speci\002cation)396 1908 y(\(http://www)-5 b(.w3.or)o
+(g/TR/1998/REC-xml-)o(19)o(98)o(02)o(10)o(.htm)o(l\).)-2
+2236 y Fp(1.2.1.)35 b(The)f(DTD)g(and)g(the)f(instance)396
+2404 y Fv(The)20 b(DTD)g(contains)g(v)n(arious)f(declarations;)g(in)h
+(general)f(you)h(can)g(only)f(use)i(a)f(feature)f(if)i(you)e(ha)n(v)o
+(e)h(pre)n(viously)396 2512 y(declared)f(it.)i(The)f(document)e
+(instance)i(\002le)h(may)e(contain)g(the)i(full)f(DTD,)g(b)n(ut)g(it)h
+(is)g(also)g(possible)f(to)g(split)h(the)f(DTD)396 2619
+y(into)g(an)g(internal)g(and)f(an)h(e)o(xternal)f(subset.)h(A)h
+(document)d(must)j(be)o(gin)e(as)h(follo)n(ws)g(if)h(the)f(full)g(DTD)g
+(is)h(included:)396 2800 y Fo(<)p Fq(?xml)44 b(version="1.0")f
+(encoding=")p Fn(Your)f(encoding)t Fq("?)p Fo(>)396 2897
+y(<)p Fq(!DOCTYPE)h Fn(root)i Fq([)486 2994 y Fn(Declarations)396
+3091 y Fq(])p Fo(>)396 3282 y Fv(These)20 b(declarations)f(are)h
+(called)g(the)h Fr(internal)e(subset)q Fv(.)i(Note)f(that)g(the)g
+(usage)g(of)g(entities)h(and)e(conditional)g(sections)396
+3390 y(is)i(restricted)f(within)g(the)g(internal)g(subset.)396
+3539 y(If)g(the)h(declarations)d(are)j(located)e(in)h(a)h(dif)n(ferent)
+e(\002le,)h(you)f(can)h(refer)g(to)g(this)h(\002le)g(as)g(follo)n(ws:)
+396 3720 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 3817 y(<)p Fq(!DOCTYPE)h
+Fn(root)i Fq(SYSTEM)e(")p Fn(file)h(name)p Fq(")p Fo(>)396
+4008 y Fv(The)20 b(declarations)f(in)h(the)h(\002le)f(are)h(called)f
+(the)g Fr(e)n(xternal)g(subset)q Fv(.)g(The)g(\002le)h(name)f(is)h
+(called)f(the)g Fr(system)h(identi\002er)r Fv(.)e(It)396
+4116 y(is)i(also)g(possible)f(to)g(refer)g(to)g(the)g(\002le)h(by)f(a)g
+(so-called)g Fr(public)f(identi\002er)r Fv(,)g(b)n(ut)i(most)f(XML)g
+(applications)f(w)o(on')o(t)g(use)396 4223 y(this)i(feature.)396
+4373 y(Y)-9 b(ou)20 b(can)g(also)g(specify)g(both)f(internal)h(and)f(e)
+o(xternal)g(subsets.)i(In)e(this)i(case,)g(the)f(declarations)f(of)h
+(both)f(subsets)i(are)396 4481 y(mix)o(ed,)e(and)h(if)g(there)g(are)g
+(con\003icts,)g(the)g(declaration)f(of)h(the)g(internal)f(subset)i(o)o
+(v)o(errides)d(those)i(of)g(the)g(e)o(xternal)396 4589
+y(subset)h(with)f(the)g(same)h(name.)e(This)h(looks)g(as)h(follo)n(ws:)
+396 4769 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 4866 y(<)p Fq(!DOCTYPE)h
+Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)p
+Black 3800 5278 a Fr(11)p Black eop
+%%Page: 12 12
+12 11 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 486 579 a Fn(Declarations)396 676 y Fq(])p Fo(>)396
+909 y Fv(The)f(XML)g(declaration)f(\(the)h(string)g(be)o(ginning)d
+(with)k Fo(<)p Fq(?xml)e Fv(and)h(ending)f(at)i Fq(?)p
+Fo(>)p Fv(\))f(should)f(specify)g(the)h(encoding)396
+1016 y(of)g(the)g(\002le.)h(Common)e(v)n(alues)h(are)g(UTF-8,)f(and)h
+(the)g(ISO-8859)e(series)j(of)f(character)f(sets.)i(Note)f(that)g(e)n
+(v)o(ery)f(\002le)396 1124 y(parsed)h(by)f(the)i(XML)f(processor)f(can)
+h(be)o(gin)f(with)h(an)g(XML)h(declaration)d(and)i(that)g(e)n(v)o(ery)f
+(\002le)i(may)e(ha)n(v)o(e)h(its)h(o)n(wn)396 1232 y(encoding.)396
+1382 y(The)f(name)g(of)g(the)g(root)f(element)h(must)g(be)g(mentioned)f
+(directly)g(after)h(the)g Fq(DOCTYPE)g Fv(string.)f(This)i(means)e
+(that)i(a)396 1490 y(full)f(document)f(instance)g(looks)h(lik)o(e)396
+1670 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p
+Fn(Your)f(encoding)t Fq("?)p Fo(>)396 1767 y(<)p Fq(!DOCTYPE)h
+Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)486
+1864 y Fn(Declarations)396 1961 y Fq(])p Fo(>)396 2156
+y(<)p Fn(root)p Fo(>)486 2253 y Fn(inner)g(contents)396
+2350 y Fo(<)p Fq(/)p Fn(root)p Fo(>)-2 2802 y Fp(1.2.2.)35
+b(Reser)q(ved)h(c)o(haracter)n(s)396 2970 y Fv(Some)20
+b(characters)f(are)i(generally)d(reserv)o(ed)h(to)h(indicate)g(markup)e
+(such)i(that)g(the)o(y)g(cannot)f(be)h(used)g(for)g(character)396
+3078 y(data.)g(These)g(characters)f(are)h Fm(<)p Fv(,)h
+Fm(>)p Fv(,)f(and)f(&.)h(Furthermore,)e(single)i(and)g(double)e(quotes)
+i(are)g(sometimes)g(reserv)o(ed.)396 3186 y(If)g(you)g(w)o(ant)g(to)g
+(include)f(such)h(a)h(character)e(as)i(character)m(,)d(write)j(it)f(as)
+h(follo)n(ws:)p Black 396 3473 a Ft(\225)p Black 60 w
+Fq(<)f Fv(instead)g(of)g Fm(<)p Black 396 3581 a Ft(\225)p
+Black 60 w Fq(>)g Fv(instead)g(of)g Fm(>)p Black 396
+3689 a Ft(\225)p Black 60 w Fq(&)g Fv(instead)g(of)g(&)p
+Black 396 3797 a Ft(\225)p Black 60 w Fq(')g Fv(instead)g(of)g(')p
+Black 396 3905 a Ft(\225)p Black 60 w Fq(")g Fv(instead)g(of)g(")
+396 4054 y(All)h(other)e(characters)h(are)g(free)g(in)g(the)g(document)
+e(instance.)i(It)g(is)i(possible)d(to)i(include)e(a)i(character)e(by)g
+(its)j(position)396 4162 y(in)f(the)f(Unicode)f(alphabet:)396
+4342 y Fq(&#)p Fn(n)p Fq(;)396 4533 y Fv(where)h Fl(n)g
+Fv(is)i(the)e(decimal)f(number)g(of)h(the)g(character)-5
+b(.)19 b(Alternati)n(v)o(ely)-5 b(,)18 b(you)h(can)h(specify)g(the)g
+(character)f(by)h(its)396 4641 y(he)o(xadecimal)e(number:)396
+4822 y Fq(&#x)p Fn(n)p Fq(;)p Black 3800 5278 a Fr(12)p
+Black eop
+%%Page: 13 13
+13 12 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(In)f(the)g(scope)g(of)g(declarations,)f(the)h
+(character)f(\045)i(is)g(no)f(longer)f(free.)g(T)-7 b(o)20
+b(include)g(it)h(as)f(character)m(,)f(you)g(must)h(use)396
+687 y(the)g(notations)g Fq(%)g Fv(or)f Fq(%)p
+Fv(.)396 836 y(Note)h(that)h(besides)f(<,)g(>,)g(&,)f
+(',)g(and)h(")f(there)h(are)g(no)g(prede\002nes)f(character)g
+(entities.)h(This)396 944 y(is)h(dif)n(ferent)e(from)g(HTML)h(which)g
+(de\002nes)g(a)g(list)i(of)d(characters)h(that)g(can)g(be)g(referenced)
+e(by)i(name)f(\(e.g.)h(ä)396 1052 y(for)g(\344\);)g(ho)n(we)n(v)o
+(er)m(,)e(if)i(you)g(prefer)e(named)i(characters,)f(you)g(can)h
+(declare)f(such)h(entities)h(yourself)e(\(see)h(belo)n(w\).)-2
+1422 y Fp(1.2.3.)35 b(Elements)g(and)f(ELEMENT)e(dec)n(larations)396
+1589 y Fv(Elements)20 b(structure)f(the)h(document)f(instance)g(in)i(a)
+f(hierarchical)f(w)o(ay)-5 b(.)20 b(There)f(is)i(a)g(top-le)n(v)o(el)d
+(element,)i(the)g Fr(r)l(oot)396 1697 y(element)q Fv(,)g(which)g
+(contains)g(a)g(sequence)f(of)h(inner)g(elements)f(and)h(character)f
+(sections.)h(The)g(inner)f(elements)h(are)396 1805 y(structured)f(in)h
+(the)f(same)h(w)o(ay)-5 b(.)20 b(Ev)o(ery)e(element)h(has)h(an)g
+Fr(element)f(type)p Fv(.)h(The)f(be)o(ginning)f(of)h(the)h(element)f
+(is)i(indicated)396 1913 y(by)f(a)h Fr(start)g(ta)o(g)p
+Fv(,)e(written)396 2093 y Fo(<)p Fn(element-type)p Fo(>)396
+2284 y Fv(and)h(the)g(element)g(continues)f(until)h(the)g
+(corresponding)d Fr(end)i(ta)o(g)h Fv(is)h(reached:)396
+2465 y Fo(<)p Fq(/)p Fn(element-type)p Fo(>)396 2655
+y Fv(In)f(XML,)f(it)i(is)f(not)g(allo)n(wed)f(to)h(omit)f(start)i(or)e
+(end)g(tags,)h(e)n(v)o(en)f(if)h(the)g(DTD)g(w)o(ould)f(permit)g(this.)
+h(Note)g(that)g(there)f(are)396 2763 y(no)h(special)g(rules)g(ho)n(w)g
+(to)g(interpret)g(spaces)g(or)g(ne)n(wlines)g(near)f(start)i(or)f(end)g
+(tags;)g(all)h(spaces)f(and)g(ne)n(wlines)g(count.)396
+2913 y(Ev)o(ery)f(element)h(type)f(must)i(be)f(declared)f(before)f(it)j
+(can)f(be)g(used.)g(The)g(declaration)f(consists)h(of)g(tw)o(o)h
+(parts:)f(the)396 3021 y(ELEMENT)f(declaration)f(describes)h(the)h
+(content)f(model,)f(i.e.)i(which)f(inner)g(elements)g(are)h(allo)n
+(wed;)f(the)h(A)-9 b(TTLIST)396 3129 y(declaration)19
+b(describes)h(the)g(attrib)n(utes)g(of)g(the)g(element.)396
+3278 y(An)g(element)g(can)g(simply)g(allo)n(w)g(e)n(v)o(erything)e(as)i
+(content.)f(This)i(is)g(written:)396 3458 y Fo(<)p Fq(!ELEMENT)43
+b Fn(name)i Fq(ANY)p Fo(>)396 3649 y Fv(On)20 b(the)h(opposite,)e(an)h
+(element)f(can)h(be)g(forced)f(to)i(be)f(empty;)f(declared)g(by:)396
+3829 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(EMPTY)p Fo(>)396
+4020 y Fv(Note)20 b(that)h(there)e(is)j(an)e(abbre)n(viated)e(notation)
+h(for)g(empty)g(element)h(instances:)g Fo(<)p Fn(name)p
+Fq(/)p Fo(>)p Fv(.)396 4170 y(There)g(are)g(tw)o(o)g(more)g
+(sophisticated)f(forms)g(of)h(declarations:)f(so-called)h
+Fr(mixed)g(declar)o(ations)p Fv(,)e(and)i Fr(r)m(e)m(gular)396
+4278 y(e)n(xpr)m(essions)p Fv(.)g(An)h(element)e(with)i(mix)o(ed)e
+(content)g(contains)g(character)g(data)h(interspersed)f(with)i(inner)e
+(elements,)396 4386 y(and)h(the)g(set)h(of)f(allo)n(wed)g(inner)f
+(elements)h(can)g(be)g(speci\002ed.)g(In)f(contrast)h(to)g(this,)h(a)g
+(re)o(gular)d(e)o(xpression)396 4494 y(declaration)h(does)h(not)g(allo)
+n(w)g(character)f(data,)h(b)n(ut)g(the)g(inner)f(elements)h(can)g(be)g
+(described)f(by)h(the)g(more)g(po)n(werful)396 4601 y(means)g(of)g(re)o
+(gular)f(e)o(xpressions.)396 4751 y(A)i(declaration)e(for)g(mix)o(ed)g
+(content)g(looks)h(as)h(follo)n(ws:)p Black 3800 5278
+a Fr(13)p Black eop
+%%Page: 14 14
+14 13 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(\(#PCDATA)e(|)i
+Fn(element)1892 609 y Fk(1)1962 579 y Fq(|)g(...)f(|)h
+Fn(element)2636 609 y Fk(n)2707 579 y Fq(\)*)p Fo(>)396
+770 y Fv(or)20 b(if)h(you)e(do)h(not)g(w)o(ant)g(to)g(allo)n(w)g(an)o
+(y)g(inner)f(element,)h(simply)396 950 y Fo(<)p Fq(!ELEMENT)43
+b Fn(name)i Fq(\(#PCDATA\))p Fo(>)396 1279 y Fj(Example)479
+1426 y Fi(If)19 b(element)g(type)g Fh(q)g Fi(is)g(declared)h(as)479
+1596 y Fh(<!ELEMENT)44 b(q)c(\(#PCDATA)k(|)c(r)h(|)g(s\)*>)479
+1776 y Fi(this)19 b(is)f(a)h(le)o(gal)g(instance:)479
+1947 y Fh(<q>This)43 b(is)e(character)j(data<r></r>with)h(<s></s>inner)
+g(elements</q>)479 2127 y Fi(But)19 b(this)g(is)f(ille)o(gal)g(because)
+i Fh(t)f Fi(has)h(not)f(been)g(enumerated)i(in)e(the)g(declaration:)479
+2297 y Fh(<q>This)43 b(is)e(character)j(data<r></r>with)h(<t></t>inner)
+g(elements</q>)396 2571 y Fv(The)20 b(other)f(form)h(uses)g(a)h(re)o
+(gular)e(e)o(xpression)f(to)j(describe)e(the)h(possible)g(contents:)396
+2752 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(regexp)p Fo(>)396
+2942 y Fv(The)20 b(follo)n(wing)f(well-kno)n(wn)f(re)o(ge)o(xp)g
+(operators)h(are)h(allo)n(wed:)p Black 396 3299 a Ft(\225)p
+Black 60 w Fn(element-name)p Black 396 3407 a Ft(\225)p
+Black 60 w Fq(\()p Fn(subexpr)839 3437 y Fk(1)910 3407
+y Fq(,)g Fv(...)g Fq(,)45 b Fn(subexpr)1463 3437 y Fk(n)1533
+3407 y Fq(\))p Black 396 3515 a Ft(\225)p Black 60 w
+Fq(\()p Fn(subexpr)839 3545 y Fk(1)910 3515 y Fq(|)20
+b Fv(...)g Fq(|)45 b Fn(subexpr)1463 3545 y Fk(n)1533
+3515 y Fq(\))p Black 396 3623 a Ft(\225)p Black 60 w
+Fn(subexpr)s Fq(*)p Black 396 3731 a Ft(\225)p Black
+60 w Fn(subexpr)s Fq(+)p Black 396 3839 a Ft(\225)p Black
+60 w Fn(subexpr)s Fq(?)396 3989 y Fv(The)20 b Fq(,)h
+Fv(operator)d(indicates)i(a)h(sequence)e(of)h(sub-models,)e(the)i
+Fq(|)h Fv(operator)d(describes)i(alternati)n(v)o(e)f(sub-models.)f(The)
+396 4096 y Fq(*)j Fv(indicates)f(zero)f(or)h(more)g(repetitions,)f(and)
+g Fq(+)i Fv(one)f(or)f(more)h(repetitions.)f(Finally)-5
+b(,)19 b Fq(?)i Fv(can)f(be)g(used)g(for)f(optional)396
+4204 y(sub-models.)g(As)i(atoms)f(the)g(re)o(ge)o(xp)e(can)i(contain)f
+(names)h(of)g(elements;)g(note)g(that)g(it)h(is)g(not)f(allo)n(wed)f
+(to)i(include)396 4312 y Fq(#PCDATA)p Fv(.)396 4462 y(The)f(e)o(xact)g
+(syntax)f(of)h(the)g(re)o(gular)f(e)o(xpressions)g(is)i(rather)e
+(strange.)h(This)g(can)g(be)g(e)o(xplained)f(best)h(by)g(a)g(list)i(of)
+396 4570 y(constraints:)p Black 396 4802 a Ft(\225)p
+Black 60 w Fv(The)e(outermost)f(e)o(xpression)g(must)h(not)g(be)g
+Fn(element-name)p Fv(.)p Black 3800 5278 a Fr(14)p Black
+eop
+%%Page: 15 15
+15 14 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 479 579 a(Ille)m(gal:)e Fq(<!ELEMENT)43 b(x)i(y>)p
+Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq(<!ELEMENT)43
+b(x)i(\(y\)>)p Fv(.)p Black 396 728 a Ft(\225)p Black
+60 w Fv(F)o(or)20 b(the)g(unary)f(operators)g Fn(subexpr)s
+Fq(*)p Fv(,)g Fn(subexpr)s Fq(+)p Fv(,)g(and)g Fn(subexpr)s
+Fq(?)p Fv(,)g(the)h Fn(subexpr)i Fv(must)f(not)f(be)g(again)f(an)479
+836 y(unary)g(operator)-5 b(.)479 986 y Fr(Ille)m(gal:)19
+b Fq(<!ELEMENT)43 b(x)i(y**>)p Fv(;)20 b(this)h(must)f(be)g(written)g
+(as)h Fq(<!ELEMENT)44 b(x)g(\(y*\)*>)p Fv(.)p Black 396
+1135 a Ft(\225)p Black 60 w Fv(Between)21 b Fq(\))f Fv(and)g(one)f(of)h
+(the)h(unary)d(operatory)g Fq(*)p Fv(,)j Fq(+)p Fv(,)f(or)g
+Fq(?)p Fv(,)g(there)g(must)g(not)g(be)g(whitespace.)479
+1285 y Fr(Ille)m(gal:)f Fq(<!ELEMENT)43 b(x)i(\(y|z\))f(*>)p
+Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq(<!ELEMENT)43
+b(x)i(\(y|z\)*>)p Fv(.)p Black 396 1434 a Ft(\225)p Black
+60 w Fv(There)20 b(is)h(the)f(additional)f(constraint)g(that)h(the)h
+(right)e(parenthsis)g(must)i(be)f(contained)e(in)j(the)f(same)g(entity)
+g(as)h(the)479 1542 y(left)g(parenthesis;)e(see)i(the)f(section)g
+(about)f(parsed)h(entities)g(belo)n(w)-5 b(.)396 1733
+y(Note)20 b(that)g(there)g(is)h(another)e(restriction)g(on)h(re)o
+(gular)e(e)o(xpressions)h(which)h(must)g(be)g(deterministic.)f(This)h
+(means)g(that)396 1841 y(the)g(parser)g(must)g(be)g(able)g(to)h(see)g
+(by)e(looking)g(at)i(the)f(ne)o(xt)f(tok)o(en)h(which)f(alternati)n(v)o
+(e)g(is)i(actually)f(used,)g(or)f(whether)396 1949 y(the)h(repetition)f
+(stops.)i(The)f(reason)f(for)g(this)i(is)g(simply)f(compatability)f
+(with)h(SGML)g(\(there)g(is)h(no)f(intrinsic)f(reason)396
+2057 y(for)h(this)h(rule;)e(XML)i(can)f(li)n(v)o(e)g(without)f(this)i
+(restriction\).)396 2302 y Fj(Example)479 2449 y Fi(The)e(elements)g
+(are)g(declared)h(as)f(follo)n(ws:)479 2620 y Fh(<!ELEMENT)44
+b(q)c(\(r?,)i(\(s)f(|)g(t\)+\)>)479 2707 y(<!ELEMENT)j(r)c
+(\(#PCDATA\)>)479 2795 y(<!ELEMENT)k(s)c(EMPTY>)479 2882
+y(<!ELEMENT)k(t)c(\(q)i(|)e(r\)>)479 3062 y Fi(This)19
+b(is)f(a)h(le)o(gal)g(instance:)479 3233 y Fh(<q><r>Some)44
+b(characters</r><s/><)q(/q>)479 3413 y Fi(\(Note:)19
+b Fg(<)p Fh(s/)p Fg(>)g Fi(is)g(an)g(abbre)n(viation)h(for)f
+Fg(<)p Fh(s)p Fg(><)p Fh(/s)p Fg(>)p Fi(.\))g(It)f(w)o(ould)i(be)f
+(ille)o(gal)f(to)h(lea)o(v)o(e)g Fh(<s/>)h Fi(out)f(because)h(at)f
+(least)f(one)479 3510 y(instance)i(of)f Fh(s)g Fi(or)g
+Fh(t)g Fi(must)g(be)g(present.)g(It)f(w)o(ould)i(be)f(ille)o(gal,)f
+(too,)h(if)f(characters)i(e)o(xisted)f(outside)h(the)e
+Fh(r)i Fi(element;)f(the)g(only)479 3607 y(e)o(xception)h(is)f(white)g
+(space.)g(\226)g(This)f(is)h(le)o(gal,)f(too:)479 3778
+y Fh(<q><s/><t><q><s/><)q(/q>)q(</t)q(></)q(q>)-2 4230
+y Fp(1.2.4.)35 b(Attrib)n(ute)e(lists)h(and)g(A)-11 b(TTLIST)34
+b(dec)n(larations)396 4398 y Fv(Elements)20 b(may)g(ha)n(v)o(e)f
+(attrib)n(utes.)h(These)g(are)g(put)g(into)g(the)g(start)h(tag)f(of)g
+(an)g(element)g(as)h(follo)n(ws:)396 4578 y Fo(<)p Fn(element-name)43
+b(attribute)1444 4608 y Fk(1)1469 4578 y Fq(=")p Fn(value)1784
+4608 y Fk(1)1810 4578 y Fq(")i(...)f Fn(attribute)2484
+4608 y Fk(n)2509 4578 y Fq(=")p Fn(value)2824 4608 y
+Fk(n)2850 4578 y Fq(")p Fo(>)396 4769 y Fv(Instead)20
+b(of)g Fq(")p Fn(value)1017 4799 y Fk(k)1043 4769 y Fq(")g
+Fv(it)h(is)g(also)g(possible)f(to)g(use)g(single)g(quotes)g(as)h(in)f
+Fq(')p Fn(value)2817 4799 y Fk(k)2843 4769 y Fq(')p Fv(.)g(Note)h(that)
+f(you)f(cannot)g(use)396 4877 y(double)g(quotes)h(literally)g(within)g
+(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(if)h(double)d(quotes)i(are)
+g(the)g(delimiters;)g(the)g(same)p Black 3800 5278 a
+Fr(15)p Black eop
+%%Page: 16 16
+16 15 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(applies)f(to)h(single)f(quotes.)f(Y)-9
+b(ou)20 b(can)g(generally)e(not)i(use)g Fm(<)h Fv(and)e(&)i(as)g
+(characters)e(in)h(attrib)n(ute)g(v)n(alues.)g(It)g(is)396
+687 y(possible)g(to)h(include)e(the)h(paraphrases)e(<,)j(>,)f
+(&,)f(',)g(and)h(")f(\(and)g(an)o(y)g(other)h(reference)e
+(to)j(a)396 795 y(general)e(entity)h(as)h(long)f(as)g(the)h(entity)f
+(is)h(not)e(de\002ned)h(by)f(an)i(e)o(xternal)d(\002le\))j(as)g(well)g
+(as)g(&#)p Fl(n)p Fv(;.)396 944 y(Before)f(you)f(can)h(use)h(an)f
+(attrib)n(ute)g(you)f(must)h(declare)g(it.)g(An)g(A)-9
+b(TTLIST)20 b(declaration)e(looks)i(as)h(follo)n(ws:)396
+1124 y Fo(<)p Fq(!ATTLIST)43 b Fn(element-name)845 1222
+y(attribute-name)f(attribute-type)h(attribute-default)845
+1319 y Fq(...)845 1416 y Fn(attribute-name)f(attribute-type)h
+(attribute-default)396 1513 y Fo(>)396 1704 y Fv(There)20
+b(are)g(a)g(lot)h(of)f(types,)f(b)n(ut)i(most)f(important)f(are:)p
+Black 396 2061 a Ft(\225)p Black 60 w Fq(CDATA)p Fv(:)h(Ev)o(ery)f
+(string)h(is)h(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)p
+Black 396 2169 a Ft(\225)p Black 60 w Fq(NMTOKEN)p Fv(:)g(Ev)o(ery)f
+(nametok)o(en)f(is)j(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)g
+(Nametok)o(ens)f(consist)h(\(mainly\))f(of)g(letters,)479
+2277 y(digits,)h(.,)h(:,)f(-,)g(_)h(in)f(arbitrary)f(order)-5
+b(.)p Black 396 2385 a Ft(\225)p Black 60 w Fq(NMTOKENS)p
+Fv(:)20 b(A)g(space-separated)f(list)i(of)f(nametok)o(ens)e(is)k(allo)n
+(wed)d(as)i(attrib)n(ute)f(v)n(alue.)396 2534 y(The)g(most)g
+(interesting)g(def)o(ault)f(declarations)g(are:)p Black
+396 2767 a Ft(\225)p Black 60 w Fq(#REQUIRED)p Fv(:)h(The)f(attrib)n
+(ute)h(must)g(be)h(speci\002ed.)p Black 396 2874 a Ft(\225)p
+Black 60 w Fq(#IMPLIED)p Fv(:)e(The)h(attrib)n(ute)f(can)g(be)h
+(speci\002ed)f(b)n(ut)h(also)g(can)f(be)h(left)g(out.)f(The)g
+(application)g(can)g(\002nd)g(out)h(whether)479 2982
+y(the)g(attrib)n(ute)g(w)o(as)h(present)f(or)g(not.)p
+Black 396 3090 a Ft(\225)p Black 60 w Fq(")p Fn(value)p
+Fq(")g Fv(or)f Fq(')p Fn(value)p Fq(')p Fv(:)h(This)g(particular)e(v)n
+(alue)i(is)g(used)g(as)h(def)o(ault)e(if)h(the)g(attrib)n(ute)g(is)g
+(omitted)g(in)g(the)g(element.)396 3378 y Fj(Example)479
+3525 y Fi(This)f(is)f(a)h(v)n(alid)g(attrib)o(ute)g(declaration)g(for)g
+(element)g(type)h Fh(r)p Fi(:)479 3695 y Fh(<!ATTLIST)44
+b(r)883 3782 y(x)c(CDATA)164 b(#REQUIRED)883 3870 y(y)40
+b(NMTOKEN)84 b(#IMPLIED)883 3957 y(z)40 b(NMTOKENS)k("one)d(two)h
+(three">)479 4137 y Fi(This)19 b(means)g(that)g Fh(x)g
+Fi(is)g(a)g(required)g(attrib)o(ute)f(that)h(cannot)h(be)f(left)g(out,)
+f(while)h Fh(y)g Fi(and)h Fh(z)f Fi(are)g(optional.)g(The)g(XML)g
+(parser)479 4235 y(indicates)h(the)f(application)g(whether)h
+Fh(y)f Fi(is)f(present)i(or)f(not,)f(b)o(ut)h(if)f Fh(z)h
+Fi(is)g(missing)g(the)g(def)o(ault)h(v)n(alue)f("one)h(tw)o(o)f(three")
+h(is)479 4332 y(returned)g(automatically)-5 b(.)479 4470
+y(This)19 b(is)f(a)h(v)n(alid)g(e)o(xample)h(of)f(these)g(attrib)o
+(utes:)479 4641 y Fh(<r)41 b(x="He)h(said:)h("I)f(don't)g(like)g
+(quotes!"")j(y='1'>)p Black 3798 5278 a Fr(16)p
+Black eop
+%%Page: 17 17
+17 16 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 583 a Fp(1.2.5.)35 b(P)l(ar)n(sed)g(entities)396
+751 y Fv(Elements)20 b(describe)f(the)i(logical)e(structure)h(of)g(the)
+g(document,)e(while)i Fr(entities)g Fv(determine)f(the)h(physical)g
+(structure.)396 859 y(Entities)h(are)f(the)g(pieces)g(of)g(te)o(xt)g
+(the)g(parser)g(operates)f(on,)h(mostly)g(\002les)h(and)f(macros.)f
+(Entities)h(may)g(be)g Fr(par)o(sed)i Fv(in)396 967 y(which)e(case)h
+(the)f(parser)f(reads)h(the)g(te)o(xt)h(and)e(interprets)g(it)i(as)g
+(XML)g(markup,)d(or)i Fr(unpar)o(sed)h Fv(which)e(simply)h(means)396
+1075 y(that)h(the)f(data)g(of)g(the)g(entity)g(has)g(a)h(foreign)d
+(format)h(\(e.g.)h(a)g(GIF)h(icon\).)396 1224 y(If)f(the)g(parsed)f
+(entity)g(is)i(going)e(to)h(be)g(used)f(as)i(part)e(of)h(the)g(DTD,)g
+(it)g(is)h(called)f(a)g Fr(par)o(ameter)f(entity)p Fv(.)h(Y)-9
+b(ou)19 b(can)h(declare)396 1332 y(a)h(parameter)e(entity)g(with)i(a)f
+(\002x)o(ed)g(te)o(xt)g(as)h(content)e(by:)396 1512 y
+Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(")p Fn(value)p
+Fq(")p Fo(>)396 1703 y Fv(W)m(ithin)20 b(the)h(DTD,)f(you)f(can)h
+Fr(r)m(efer)h(to)f Fv(this)h(entity)-5 b(,)19 b(i.e.)i(read)e(the)h(te)
+o(xt)g(of)g(the)h(entity)-5 b(,)19 b(by:)396 1883 y Fq(\045)p
+Fn(name)p Fq(;)396 2074 y Fv(Such)h(entities)h(beha)n(v)o(e)e(lik)o(e)h
+(macros,)f(i.e.)i(when)e(the)o(y)h(are)g(referred)e(to,)i(the)g(macro)g
+(te)o(xt)g(is)h(inserted)e(and)h(read)396 2182 y(instead)g(of)g(the)g
+(original)f(te)o(xt.)396 2478 y Fj(Example)479 2625 y
+Fi(F)o(or)g(e)o(xample,)g(you)h(can)f(declare)h(tw)o(o)f(elements)g
+(with)f(the)h(same)h(content)f(model)h(by:)479 2795 y
+Fh(<!ENTITY)43 b(\045)e(model)h("a)f(|)g(b)g(|)f(c">)479
+2882 y(<!ELEMENT)k(x)c(\(\045model;\)>)479 2970 y(<!ELEMENT)k(y)c
+(\(\045model;\)>)396 3202 y Fv(If)20 b(the)h(contents)e(of)h(the)g
+(entity)g(are)g(gi)n(v)o(en)f(as)i(string)f(constant,)f(the)h(entity)g
+(is)h(called)f(an)g Fr(internal)g Fv(entity)-5 b(.)19
+b(It)i(is)g(also)396 3310 y(possible)f(to)h(name)e(a)i(\002le)g(to)f
+(be)g(used)g(as)h(content)e(\(an)h Fr(e)n(xternal)g Fv(entity\):)396
+3490 y Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(SYSTEM)g(")p
+Fn(file)g(name)p Fq(")p Fo(>)396 3681 y Fv(There)20 b(are)g(some)g
+(restrictions)f(for)h(parameter)f(entities:)p Black 396
+4038 a Ft(\225)p Black 60 w Fv(If)h(the)h(internal)e(parameter)g
+(entity)g(contains)h(the)g(\002rst)h(tok)o(en)e(of)h(a)h(declaration)e
+(\(i.e.)g Fo(<)p Fq(!)p Fv(\),)h(it)h(must)f(also)h(contain)479
+4146 y(the)f(last)i(tok)o(en)d(of)h(the)g(declaration,)e(i.e.)j(the)f
+Fo(>)p Fv(.)g(This)g(means)g(that)h(the)f(entity)g(either)g(contains)f
+(a)i(whole)e(number)479 4254 y(of)h(complete)f(declarations,)g(or)h
+(some)g(te)o(xt)g(from)f(the)h(middle)g(of)g(one)f(declaration.)479
+4404 y Fr(Ille)m(gal:)479 4542 y Fq(<!ENTITY)44 b(\045)g(e)h("\(a)f(|)h
+(b)g(|)f(c\)>">)479 4639 y(<!ELEMENT)g(x)g(\045e;)479
+4789 y Fv(Because)21 b Fo(<)p Fq(!)f Fv(is)h(contained)e(in)h(the)g
+(main)g(entity)-5 b(,)19 b(and)h(the)g(corresponding)d
+Fo(>)j Fv(is)h(contained)e(in)h(the)h(entity)e Fq(e)p
+Fv(.)p Black 3797 5278 a Fr(17)p Black eop
+%%Page: 18 18
+18 17 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black Black 396 579 a Ft(\225)p Black 60 w Fv(If)f(the)h(internal)e
+(parameter)g(entity)g(contains)h(a)h(left)f(paranthesis,)f(it)i(must)f
+(also)h(contain)e(the)h(corresponding)d(right)479 687
+y(paranthesis.)479 836 y Fr(Ille)m(gal:)479 975 y Fq(<!ENTITY)44
+b(\045)g(e)h("\(a)f(|)h(b)g(|)f(c">)479 1072 y(<!ELEMENT)g(x)g
+(\045e;\)>)479 1222 y Fv(Because)21 b Fq(\()f Fv(is)h(contained)e(in)h
+(the)g(entity)g Fq(e)p Fv(,)h(and)e(the)i(corresponding)16
+b Fq(\))21 b Fv(is)g(contained)e(in)h(the)g(main)g(entity)-5
+b(.)p Black 396 1371 a Ft(\225)p Black 60 w Fv(When)20
+b(reading)e(te)o(xt)i(from)f(an)g(entity)-5 b(,)19 b(the)h(parser)f
+(automatically)f(inserts)i(one)g(space)f(character)g(before)f(the)i
+(entity)479 1479 y(te)o(xt)g(and)g(one)g(space)g(character)f(after)h
+(the)g(entity)g(te)o(xt.)f(Ho)n(we)n(v)o(er)m(,)f(this)j(rule)f(is)h
+(not)f(applied)f(within)h(the)g(de\002nition)479 1587
+y(of)g(another)f(entity)-5 b(.)479 1736 y Fr(Le)m(gal:)479
+1875 y Fq(<!ENTITY)44 b(\045)g(suffix)g("gif">)479 1972
+y(<!ENTITY)g(iconfile)f('icon.\045suffix;'>)479 2121
+y Fv(Because)21 b Fq(\045suffix;)e Fv(is)i(referenced)d(within)i(the)g
+(de\002nition)f(te)o(xt)h(for)g Fq(iconfile)p Fv(,)f(no)h(additional)f
+(spaces)h(are)479 2229 y(added.)479 2379 y Fr(Ille)m(gal:)479
+2517 y Fq(<!ENTITY)44 b(\045)g(suffix)g("test">)479 2615
+y(<!ELEMENT)g(x.\045suffix;)f(ANY>)479 2764 y Fv(Because)21
+b Fq(\045suffix;)e Fv(is)i(referenced)d(outside)i(the)g(de\002nition)f
+(te)o(xt)h(of)g(another)f(entity)-5 b(,)19 b(the)h(parser)g(replaces)
+479 2872 y Fq(\045suffix;)g Fv(by)f Fn(space)p Fq(test)p
+Fn(space)p Fv(.)479 3021 y Fr(Ille)m(gal:)479 3160 y
+Fq(<!ENTITY)44 b(\045)g(e)h("\(a)f(|)h(b)g(|)f(c\)">)479
+3257 y(<!ELEMENT)g(x)g(\045e;*>)479 3407 y Fv(Because)21
+b(there)e(is)j(a)e(whitespace)g(between)f Fq(\))i Fv(and)e
+Fq(*)p Fv(,)i(which)e(is)i(ille)o(gal.)p Black 396 3556
+a Ft(\225)p Black 60 w Fv(An)f(e)o(xternal)f(parameter)g(entity)h(must)
+g(al)o(w)o(ays)h(consist)f(of)g(a)h(whole)e(number)g(of)h(complete)f
+(declarations.)p Black 396 3664 a Ft(\225)p Black 60
+w Fv(In)h(the)g(internal)g(subset)g(of)g(the)g(DTD,)g(a)h(reference)d
+(to)j(a)f(parameter)f(entity)h(\(internal)f(or)h(e)o(xternal\))e(is)k
+(only)479 3772 y(allo)n(wed)e(at)h(positions)e(where)h(a)g(ne)n(w)g
+(declaration)f(can)h(start.)396 3963 y(If)g(the)f(parsed)g(entity)g(is)
+h(going)e(to)i(be)f(used)g(in)h(the)f(document)e(instance,)i(it)h(is)h
+(called)e(a)h Fr(g)o(ener)o(al)e(entity)p Fv(.)h(Such)g(entities)396
+4071 y(can)h(be)g(used)g(as)h(abbre)n(viations)d(for)i(frequent)e
+(phrases,)i(or)g(to)g(include)f(e)o(xternal)g(\002les.)i(Internal)e
+(general)g(entities)i(are)396 4179 y(declared)e(as)i(follo)n(ws:)396
+4359 y Fo(<)p Fq(!ENTITY)44 b Fn(name)g Fq(")p Fn(value)p
+Fq(")p Fo(>)396 4550 y Fv(External)19 b(general)g(entities)i(are)f
+(declared)f(this)i(w)o(ay:)396 4730 y Fo(<)p Fq(!ENTITY)44
+b Fn(name)g Fq(SYSTEM)g(")p Fn(file)g(name)p Fq(")p Fo(>)p
+Black 3800 5278 a Fr(18)p Black eop
+%%Page: 19 19
+19 18 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fv(References)f(to)g(general)f(entities)i(are)f
+(written)g(as:)396 759 y Fq(&)p Fn(name)p Fq(;)396 950
+y Fv(The)g(main)g(dif)n(ference)e(between)h(parameter)g(and)h(general)f
+(entities)h(is)i(that)e(the)g(former)f(are)h(only)f(recognized)f(in)j
+(the)396 1058 y(DTD)g(and)e(that)i(the)f(latter)g(are)g(only)g
+(recognized)e(in)i(the)g(document)e(instance.)i(As)h(the)f(DTD)g(is)i
+(parsed)d(before)g(the)396 1166 y(document,)f(the)i(parameter)f
+(entities)i(are)f(e)o(xpanded)d(\002rst;)k(for)f(e)o(xample)f(it)i(is)g
+(possible)f(to)g(use)h(the)f(content)f(of)h(a)396 1274
+y(parameter)f(entity)h(as)h(the)f(name)g(of)f(a)i(general)e(entity:)h
+Fq(&\045name;;)2557 1241 y Ff(1)2580 1274 y Fv(.)396
+1423 y(General)g(entities)g(must)h(respect)e(the)i(element)e(hierarchy)
+-5 b(.)17 b(This)k(means)f(that)g(there)g(must)g(be)g(an)g(end)g(tag)g
+(for)g(e)n(v)o(ery)396 1531 y(start)h(tag)f(in)h(the)f(entity)g(v)n
+(alue,)f(and)h(that)g(end)f(tags)i(without)e(corresponding)e(start)k
+(tags)f(are)g(not)g(allo)n(wed.)396 1777 y Fj(Example)479
+1924 y Fi(If)f(the)f(author)i(of)f(a)f(document)j(changes)f(sometimes,)
+f(it)f(is)g(w)o(orthwhile)h(to)g(set)f(up)i(a)e(general)i(entity)e
+(containing)i(the)f(names)479 2021 y(of)g(the)g(authors.)h(If)e(the)h
+(author)h(changes,)g(you)f(need)h(only)g(to)e(change)j(the)e
+(de\002nition)g(of)g(the)g(entity)-5 b(,)18 b(and)i(do)f(not)h(need)f
+(to)479 2118 y(check)h(all)f(occurrences)h(of)f(authors')h(names:)479
+2289 y Fh(<!ENTITY)43 b(authors)g("Gerd)f(Stolpmann">)479
+2469 y Fi(In)19 b(the)g(document)i(te)o(xt,)d(you)i(can)f(no)n(w)h
+(refer)e(to)h(the)g(author)h(names)f(by)h(writing)e Fh(&authors;)p
+Fi(.)479 2607 y Fe(Ille)m(gal:)h Fi(The)g(follo)n(wing)g(tw)o(o)g
+(entities)g(are)g(ille)o(gal)f(because)i(the)f(elements)g(in)g(the)g
+(de\002nition)g(do)g(not)h(nest)f(properly:)479 2778
+y Fh(<!ENTITY)43 b(lengthy-tag)i("<section)e(textcolor='white')j
+(background='graphi)q(c'>)q(">)479 2865 y(<!ENTITY)d(nonsense)165
+b("<a></b>">)396 3139 y Fv(Earlier)20 b(in)g(this)h(introduction)d(we)i
+(e)o(xplained)e(that)j(there)e(are)i(substitutes)f(for)g(reserv)o(ed)e
+(characters:)i(<,)g(>,)396 3247 y(&,)f(',)h(and)f
+(".)g(These)h(are)g(simply)g(prede\002ned)e(general)h(entities;)i
+(note)f(that)g(the)o(y)g(are)g(the)g(only)396 3355 y(prede\002ned)e
+(entities.)j(It)f(is)h(allo)n(wed)f(to)g(de\002ne)g(these)g(entities)h
+(again)e(as)i(long)e(as)i(the)f(meaning)f(is)i(unchanged.)-2
+3725 y Fp(1.2.6.)35 b(Notations)g(and)e(unpar)n(sed)i(entities)396
+3892 y Fv(Unparsed)19 b(entities)i(ha)n(v)o(e)e(a)i(foreign)d(format)i
+(and)f(can)h(thus)g(not)g(be)g(read)g(by)g(the)g(XML)g(parser)-5
+b(.)20 b(Unparsed)f(entities)396 4000 y(are)h(al)o(w)o(ays)h(e)o
+(xternal.)e(The)h(format)f(of)h(an)g(unparsed)e(entity)i(must)g(ha)n(v)
+o(e)g(been)f(declared,)g(such)h(a)h(format)e(is)i(called)f(a)396
+4108 y Fr(notation)p Fv(.)f(The)g(entity)h(can)g(then)g(be)g(declared)f
+(by)h(referring)e(to)i(this)h(notation.)e(As)i(unparsed)d(entities)j
+(do)f(not)396 4216 y(contain)f(XML)i(te)o(xt,)e(it)i(is)h(not)d
+(possible)h(to)h(include)e(them)h(directly)f(into)h(the)g(document;)e
+(you)i(can)g(only)f(declare)396 4324 y(attrib)n(utes)h(such)g(that)h
+(names)e(of)h(unparsed)f(entities)h(are)h(acceptable)e(v)n(alues.)396
+4474 y(As)i(you)f(can)g(see,)g(unparsed)f(entities)h(are)g(too)g
+(complicated)f(in)h(order)f(to)h(ha)n(v)o(e)g(an)o(y)f(purpose.)g(It)h
+(is)h(almost)f(al)o(w)o(ays)396 4581 y(better)g(to)g(simply)g(pass)h
+(the)f(name)g(of)g(the)g(data)g(\002le)h(as)g(normal)e(attrib)n(ute)g
+(v)n(alue,)h(and)f(let)i(the)f(application)f(recognize)396
+4689 y(and)h(process)g(the)g(foreign)e(format.)p Black
+3800 5278 a Fr(19)p Black eop
+%%Page: 20 20
+20 19 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black -2 597 a Fx(1.3.)39 b(A)g(complete)f(e)n(xample:)g(The)h
+Fd(readme)k Fx(DTD)396 777 y Fv(The)20 b(reason)g(for)f
+Fr(r)m(eadme)h Fv(w)o(as)h(that)f(I)g(often)g(wrote)g(tw)o(o)g(v)o
+(ersions)f(of)h(\002les)h(such)f(as)h(README)g(and)e(INST)-8
+b(ALL)396 885 y(which)20 b(e)o(xplain)f(aspects)h(of)g(a)h(distrib)n
+(uted)e(softw)o(are)h(archi)n(v)o(e;)f(one)g(v)o(ersion)g(w)o(as)i
+(ASCII-formatted,)d(the)i(other)g(w)o(as)396 993 y(written)g(in)h
+(HTML.)e(Maintaining)g(both)g(v)o(ersions)h(means)f(double)g(amount)g
+(of)h(w)o(ork,)f(and)h(changes)f(of)h(one)f(v)o(ersion)396
+1101 y(may)h(be)g(for)o(gotten)e(in)i(the)g(other)f(v)o(ersion.)g(T)-7
+b(o)20 b(impro)o(v)o(e)e(this)j(situation)e(I)i(in)m(v)o(ented)d(the)i
+Fr(r)m(eadme)g Fv(DTD)g(which)f(allo)n(ws)396 1209 y(me)h(to)h
+(maintain)e(only)h(one)f(source)h(written)g(as)g(XML)h(document,)d(and)
+h(to)i(generate)e(the)h(ASCII)g(and)g(the)g(HTML)396
+1317 y(v)o(ersion)f(from)g(it.)396 1466 y(In)h(this)h(section,)f(I)g(e)
+o(xplain)f(only)g(the)i(DTD.)f(The)f Fr(r)m(eadme)h Fv(DTD)h(is)g
+(contained)d(in)j(the)f(PXP)h(distrib)n(ution)e(together)396
+1574 y(with)i(the)f(tw)o(o)g(con)m(v)o(erters)e(to)j(produce)d(ASCII)i
+(and)g(HTML.)g(Another)e(section)i(of)g(this)h(manual)e(describes)h
+(the)396 1682 y(HTML)g(con)m(v)o(erter)-5 b(.)396 1831
+y(The)20 b(documents)f(ha)n(v)o(e)g(a)i(simple)f(structure:)f(There)h
+(are)g(up)g(to)g(three)g(le)n(v)o(els)g(of)g(nested)g(sections,)g
+(paragraphs,)d(item)396 1939 y(lists,)22 b(footnotes,)c(hyperlinks,)g
+(and)h(te)o(xt)h(emphasis.)g(The)g(outermost)f(element)g(has)i(usually)
+e(the)h(type)g Fq(readme)p Fv(,)g(it)h(is)396 2047 y(declared)e(by)396
+2228 y Fq(<!ELEMENT)44 b(readme)f(\(sect1+\)>)396 2325
+y(<!ATTLIST)h(readme)845 2422 y(title)g(CDATA)g(#REQUIRED>)396
+2613 y Fv(This)21 b(means)f(that)g(this)h(element)e(contains)h(one)f
+(or)h(more)f(sections)i(of)f(the)g(\002rst)h(le)n(v)o(el)f(\(element)f
+(type)h Fq(sect1)p Fv(\),)f(and)396 2721 y(that)i(the)f(element)f(has)i
+(a)f(required)f(attrib)n(ute)h Fq(title)f Fv(containing)g(character)g
+(data)h(\(CD)m(A)-9 b(T)h(A\).)19 b(Note)h(that)h Fq(readme)396
+2829 y Fv(elements)f(must)g(not)g(contain)f(te)o(xt)h(data.)396
+2978 y(The)g(three)g(le)n(v)o(els)g(of)g(sections)g(are)g(declared)f
+(as)i(follo)n(ws:)396 3158 y Fq(<!ELEMENT)44 b(sect1)g
+(\(title,\(sect2|p|ul\)+\)>)396 3352 y(<!ELEMENT)g(sect2)g
+(\(title,\(sect3|p|ul\)+\)>)396 3547 y(<!ELEMENT)g(sect3)g
+(\(title,\(p|ul\)+\)>)396 3738 y Fv(Ev)o(ery)19 b(section)h(has)g(a)h
+Fq(title)f Fv(element)g(as)g(\002rst)h(subelement.)e(After)h(the)g
+(title)h(an)f(arbitrary)f(b)n(ut)h(non-empty)396 3846
+y(sequence)f(of)h(inner)g(sections,)g(paragraphs)e(and)h(item)i(lists)g
+(follo)n(ws.)f(Note)g(that)g(the)g(inner)g(sections)g(must)g(belong)f
+(to)396 3954 y(the)h(ne)o(xt)g(higher)f(section)h(le)n(v)o(el;)g
+Fq(sect3)g Fv(elements)f(must)i(not)f(contain)f(inner)g(sections)h
+(because)g(there)g(is)h(no)e(ne)o(xt)396 4061 y(higher)g(le)n(v)o(el.)
+396 4211 y(Ob)o(viously)-5 b(,)18 b(all)j(three)f(declarations)e(allo)n
+(w)j(paragraphs)d(\()p Fq(p)p Fv(\))h(and)h(item)g(lists)i(\()p
+Fq(ul)p Fv(\).)e(The)f(de\002nition)g(can)h(be)396 4319
+y(simpli\002ed)g(at)h(this)g(point)e(by)h(using)f(a)i(parameter)e
+(entity:)396 4499 y Fq(<!ENTITY)44 b(\045)g(p.like)g("p|ul">)396
+4693 y(<!ELEMENT)g(sect1)g(\(title,\(sect2|\045p.like;\)+\)>)p
+Black 3800 5278 a Fr(20)p Black eop
+%%Page: 21 21
+21 20 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fq(<!ELEMENT)44 b(sect2)g
+(\(title,\(sect3|\045p.like;\)+\)>)396 773 y(<!ELEMENT)g(sect3)g
+(\(title,\(\045p.like;\)+\)>)396 964 y Fv(Here,)20 b(the)g(entity)g
+Fq(p.like)g Fv(is)h(nothing)e(b)n(ut)h(a)g(macro)g(abbre)n(viating)d
+(the)j(same)h(sequence)e(of)h(declarations;)f(if)h(ne)n(w)396
+1072 y(elements)f(on)h(the)f(same)h(le)n(v)o(el)f(as)h
+Fq(p)g Fv(and)f Fq(ul)h Fv(are)f(later)h(added,)e(it)i(is)h(suf)n
+(\002cient)e(only)f(to)i(change)e(the)i(entity)f(de\002nition.)396
+1180 y(Note)h(that)h(there)e(are)i(some)f(restrictions)f(on)h(the)g
+(usage)g(of)g(entities)h(in)f(this)h(conte)o(xt;)e(most)h(important,)e
+(entities)396 1288 y(containing)h(a)h(left)h(paranthesis)e(must)h(also)
+h(contain)e(the)h(corresponding)d(right)i(paranthesis.)396
+1437 y(Note)h(that)h(the)f(entity)g Fq(p.like)g Fv(is)h(a)f
+Fr(par)o(ameter)i Fv(entity)-5 b(,)19 b(i.e.)h(the)g(ENTITY)g
+(declaration)e(contains)i(a)g(percent)f(sign,)396 1545
+y(and)h(the)g(entity)g(is)h(referred)e(to)h(by)g Fq(\045p.like;)p
+Fv(.)f(This)h(kind)g(of)f(entity)h(must)h(be)f(used)g(to)g(abbre)n
+(viate)e(parts)j(of)f(the)396 1653 y(DTD;)g(the)g Fr(g)o(ener)o(al)f
+Fv(entities)h(declared)e(without)h(percent)g(sign)h(and)f(referred)f
+(to)i(as)g Fq(&name;)f Fv(are)h(not)f(allo)n(wed)g(in)h(this)396
+1761 y(conte)o(xt.)396 1911 y(The)g Fq(title)g Fv(element)g
+(speci\002es)g(the)h(title)f(of)g(the)h(section)f(in)g(which)g(it)g
+(occurs.)g(The)f(title)i(is)h(gi)n(v)o(en)c(as)j(character)396
+2019 y(data,)f(optionally)f(interspersed)f(with)j(line)f(breaks)g(\()p
+Fq(br)p Fv(\):)396 2199 y Fq(<!ELEMENT)44 b(title)g(\(#PCDATA|br\)*>)
+396 2390 y Fv(Compared)19 b(with)h(the)g Fq(title)g Fr(attrib)n(ute)g
+Fv(of)g(the)h Fq(readme)e Fv(element,)h(this)g(element)g(allo)n(ws)g
+(inner)g(markup)e(\(i.e.)i Fq(br)p Fv(\))396 2498 y(while)g(attrib)n
+(ute)g(v)n(alues)g(do)g(not:)g(It)g(is)h(an)g(error)e(if)h(an)g(attrib)
+n(ute)g(v)n(alue)g(contains)f(the)h(left)h(angle)e(brack)o(et)g
+Fm(<)i Fv(literally)396 2605 y(such)f(that)g(it)h(is)h(impossible)d(to)
+h(include)g(inner)f(elements.)396 2755 y(The)h(paragraph)e(element)h
+Fq(p)i Fv(has)f(a)h(structure)e(similar)i(to)f Fq(title)p
+Fv(,)g(b)n(ut)g(it)h(allo)n(ws)f(more)g(inner)f(elements:)396
+2935 y Fq(<!ENTITY)44 b(\045)g(text)h("br|code|em|footnote|a">)396
+3129 y(<!ELEMENT)f(p)g(\(#PCDATA|\045text;\)*>)396 3320
+y Fv(Line)20 b(breaks)g(do)f(not)h(ha)n(v)o(e)g(inner)f(structure,)g
+(so)i(the)o(y)e(are)h(declared)f(as)i(being)e(empty:)396
+3500 y Fq(<!ELEMENT)44 b(br)g(EMPTY>)396 3691 y Fv(This)21
+b(means)f(that)g(really)g(nothing)e(is)j(allo)n(wed)f(within)g
+Fq(br)p Fv(;)g(you)f(must)i(al)o(w)o(ays)f(write)h Fq(<br></br>)e
+Fv(or)h(abbre)n(viated)396 3799 y Fq(<br/>)p Fv(.)396
+3949 y(Code)g(samples)h(should)e(be)h(mark)o(ed)f(up)h(by)f(the)h
+Fq(code)h Fv(tag;)f(emphasized)f(te)o(xt)h(can)g(be)g(indicated)f(by)h
+Fq(em)p Fv(:)396 4129 y Fq(<!ELEMENT)44 b(code)g(\(#PCDATA\)>)396
+4323 y(<!ELEMENT)g(em)g(\(#PCDATA|\045text;\)*>)396 4514
+y Fv(That)20 b Fq(code)g Fv(elements)g(are)g(not)g(allo)n(wed)g(to)g
+(contain)f(further)g(markup)f(while)i Fq(em)h Fv(elements)f(do)g(is)h
+(a)f(design)g(decision)396 4622 y(by)g(the)g(author)f(of)h(the)g(DTD.)
+396 4772 y(Unordered)e(lists)k(simply)d(consists)i(of)f(one)g(or)g
+(more)f(list)i(items,)g(and)e(a)i(list)g(item)g(may)e(contain)g
+(paragraph-le)n(v)o(el)396 4879 y(material:)p Black 3800
+5278 a Fr(21)p Black eop
+%%Page: 22 22
+22 21 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 396 579 a Fq(<!ELEMENT)44 b(ul)g(\(li+\)>)396 773
+y(<!ELEMENT)g(li)g(\(\045p.like;\)*>)396 964 y Fv(F)o(ootnotes)19
+b(are)h(described)f(by)h(the)g(te)o(xt)g(of)g(the)g(note;)g(this)h(te)o
+(xt)f(may)g(contain)f(te)o(xt-le)n(v)o(el)g(markup.)f(There)h(is)i(no)
+396 1072 y(mechanism)e(to)i(describe)e(the)h(numbering)e(scheme)h(of)h
+(footnotes,)f(or)h(to)g(specify)g(ho)n(w)f(footnote)g(references)f(are)
+396 1180 y(printed.)396 1360 y Fq(<!ELEMENT)44 b(footnote)f
+(\(#PCDATA|\045text;\)*>)396 1551 y Fv(Hyperlinks)19
+b(are)h(written)g(as)h(in)f(HTML.)g(The)g(anchor)f(tag)h(contains)f
+(the)h(te)o(xt)g(describing)f(where)h(the)g(link)g(points)g(to,)396
+1659 y(and)g(the)g Fq(href)g Fv(attrib)n(ute)g(is)h(the)f(pointer)f
+(\(as)i(URL\).)f(There)f(is)j(no)d(w)o(ay)i(to)f(describe)f(locations)h
+(of)g("hash)g(marks".)f(If)396 1767 y(the)h(link)g(refers)g(to)g
+(another)f Fr(r)m(eadme)h Fv(document,)e(the)i(attrib)n(ute)g
+Fq(readmeref)f Fv(should)g(be)h(used)g(instead)g(of)g
+Fq(href)p Fv(.)396 1875 y(The)g(reason)g(is)h(that)f(the)g(con)m(v)o
+(erted)e(document)g(has)i(usually)g(a)h(dif)n(ferent)d(system)i
+(identi\002er)g(\(\002le)h(name\),)d(and)i(the)396 1983
+y(link)g(to)h(a)f(con)m(v)o(erted)e(document)g(must)i(be)g(con)m(v)o
+(erted,)e(too.)396 2163 y Fq(<!ELEMENT)44 b(a)g(\(#PCDATA\)*>)396
+2260 y(<!ATTLIST)g(a)845 2357 y(href)268 b(CDATA)44 b(#IMPLIED)845
+2454 y(readmeref)f(CDATA)h(#IMPLIED)396 2552 y(>)396
+2742 y Fv(Note)20 b(that)h(although)d(it)j(is)g(only)e(sensible)i(to)f
+(specify)g(one)f(of)h(the)g(tw)o(o)h(attrib)n(utes,)f(the)g(DTD)g(has)h
+(no)e(means)h(to)396 2850 y(e)o(xpress)g(this)g(restriction.)396
+3000 y(So)h(f)o(ar)f(the)g(DTD.)g(Finally)-5 b(,)19 b(here)h(is)h(a)g
+(document)d(for)i(it:)396 3180 y Fq(<?xml)44 b(version="1.0")f
+(encoding="ISO-8859-1"?>)396 3277 y(<!DOCTYPE)h(readme)f(SYSTEM)h
+("readme.dtd">)396 3374 y(<readme)g(title="How)f(to)i(use)f(the)g
+(readme)g(converters">)396 3471 y(<sect1>)486 3569 y
+(<title>Usage</title>)486 3666 y(<p>)576 3763 y(The)g(<em>readme</em>)e
+(converter)i(is)g(invoked)g(on)g(the)h(command)e(line)h(by:)486
+3860 y(</p>)486 3957 y(<p>)576 4054 y(<code>readme)e([)j(-text)f(|)h
+(-html)f(])g(input.xml</code>)486 4151 y(</p>)486 4248
+y(<p>)576 4346 y(Here)g(a)g(list)h(of)f(options:)486
+4443 y(</p>)486 4540 y(<ul>)576 4637 y(<li>)665 4734
+y(<p><code>-)396 4831 y(text</code>:)f(specifies)g(that)i(ASCII)f
+(output)f(should)h(be)h(produced</p>)p Black 3800 5278
+a Fr(22)p Black eop
+%%Page: 23 23
+23 22 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p
+Black 576 579 a Fq(</li>)576 676 y(<li>)665 773 y(<p><code>-)396
+870 y(html</code>:)43 b(specifies)g(that)i(HTML)f(output)g(should)f(be)
+i(produced</p>)576 967 y(</li>)486 1065 y(</ul>)486 1162
+y(<p>)576 1259 y(The)f(input)g(file)g(must)g(be)h(given)f(on)g(the)h
+(command)e(line.)h(The)h(converted)e(output)h(is)576
+1356 y(printed)f(to)i(<em>stdout</em>.)486 1453 y(</p>)396
+1550 y(</sect1>)396 1647 y(<sect1>)486 1745 y(<title>Author</title>)486
+1842 y(<p>)576 1939 y(The)f(program)g(has)g(been)g(written)g(by)576
+2036 y(<a)g(href="mailto:Gerd.Stolpmann@darmstadt.ne)o(tsurf.)o(de">Ge)
+o(rd)39 b(Stolpmann</a>.)486 2133 y(</p>)396 2230 y(</sect1>)396
+2327 y(</readme>)-2 2746 y Fx(Notes)p Black 396 2926
+a Fv(1.)p Black 70 w(This)20 b(construct)g(is)h(only)e(allo)n(wed)h
+(within)g(the)g(de\002nition)f(of)h(another)e(entity;)i(otherwise)g(e)o
+(xtra)f(spaces)i(w)o(ould)529 3034 y(be)f(added)f(\(as)i(e)o(xplained)d
+(abo)o(v)o(e\).)g(Such)i(indirection)e(is)j(not)f(recommended.)529
+3172 y Fi(Complete)f(e)o(xample:)529 3343 y Fh(<!ENTITY)43
+b(\045)e(variant)i("a">)243 b(<!-)42 b(or)f("b")g(->)529
+3430 y(<!ENTITY)i(text-a)g("This)f(is)f(text)h(A.">)529
+3518 y(<!ENTITY)h(text-b)g("This)f(is)f(text)h(B.">)529
+3605 y(<!ENTITY)h(text)f("&text-\045variant)q(;;")q(>)529
+3785 y Fi(Y)-8 b(ou)19 b(can)h(no)n(w)f(write)f Fh(&text;)j
+Fi(in)e(the)g(document)h(instance,)f(and)h(depending)h(on)e(the)g(v)n
+(alue)g(of)g Fh(variant)i Fi(either)e Fh(text-a)i Fi(or)529
+3882 y Fh(text-b)g Fi(is)d(inserted.)p Black 3800 5278
+a Fr(23)p Black eop
+%%Page: 24 24
+24 23 bop Black Black -2 621 a Fs(Chapter)48 b(2.)f(Using)i(PXP)-2
+1055 y Fx(2.1.)39 b(V)-9 b(alidation)396 1235 y Fv(The)20
+b(parser)g(can)g(be)g(used)g(to)g Fr(validate)f Fv(a)i(document.)d
+(This)i(means)g(that)g(all)h(the)f(constraints)g(that)g(must)g(hold)g
+(for)f(a)396 1343 y(v)n(alid)h(document)e(are)i(actually)g(check)o(ed.)
+f(V)-9 b(alidation)19 b(is)i(the)f(def)o(ault)f(mode)h(of)g(PXP,)g
+(i.e.)h(e)n(v)o(ery)d(document)h(is)396 1451 y(v)n(alidated)g(while)i
+(it)f(is)i(being)d(parsed.)396 1600 y(In)h(the)g Fq(examples)g
+Fv(directory)e(of)i(the)g(distrib)n(ution)f(you)h(\002nd)g(the)g
+Fq(pxpvalidate)f Fv(application.)f(It)j(is)g(in)m(v)n(ok)o(ed)d(in)j
+(the)396 1708 y(follo)n(wing)e(w)o(ay:)396 1888 y Fq(pxpvalidate)43
+b([)i(-wf)f(])h Fn(file)p Fq(...)396 2079 y Fv(The)20
+b(\002les)h(mentioned)e(on)g(the)i(command)d(line)i(are)g(v)n
+(alidated,)f(and)h(e)n(v)o(ery)e(w)o(arning)h(and)h(e)n(v)o(ery)f
+(error)g(messages)h(are)396 2187 y(printed)f(to)i(stderr)-5
+b(.)396 2337 y(The)20 b(-wf)g(switch)h(modi\002es)e(the)i(beha)n(viour)
+d(such)i(that)g(a)h(well-formedness)d(parser)h(is)i(simulated.)f(In)g
+(this)g(mode,)f(the)396 2445 y(ELEMENT)-6 b(,)19 b(A)-9
+b(TTLIST)j(,)19 b(and)g(NO)m(T)-8 b(A)f(TION)20 b(declarations)f(of)h
+(the)g(DTD)g(are)g(ignored,)e(and)i(only)f(the)i(ENTITY)396
+2553 y(declarations)e(will)i(tak)o(e)f(ef)n(fect.)g(This)g(mode)f(is)i
+(intended)e(for)h(documents)e(lacking)h(a)i(DTD.)f(Please)h(note)f
+(that)g(the)396 2661 y(parser)g(still)h(scans)g(the)f(DTD)g(fully)g
+(and)g(will)h(report)e(all)h(errors)g(in)g(the)g(DTD;)h(such)f(checks)f
+(are)h(not)g(required)f(by)g(a)396 2769 y(well-formedness)f(parser)-5
+b(.)396 2918 y(The)20 b Fq(pxpvalidate)f Fv(application)g(is)i(the)f
+(simplest)h(sensible)f(program)e(using)i(PXP,)g(you)g(may)f(consider)g
+(it)i(as)396 3026 y("hello)f(w)o(orld")f(program.)-2
+3445 y Fx(2.2.)39 b(Ho)n(w)g(to)g(par)n(se)f(a)i(document)d(fr)m(om)i
+(an)g(application)396 3624 y Fv(Let)21 b(me)f(\002rst)h(gi)n(v)o(e)e(a)
+i(rough)d(o)o(v)o(ervie)n(w)g(of)i(the)h(object)e(model)g(of)h(the)h
+(parser)-5 b(.)19 b(The)h(follo)n(wing)f(items)h(are)h(represented)396
+3732 y(by)f(objects:)p Black 396 4055 a Ft(\225)p Black
+60 w Fr(Documents:)f Fv(The)h(document)e(representation)g(is)j(more)e
+(or)h(less)h(the)f(anchor)f(for)g(the)h(application;)f(all)i(accesses)g
+(to)479 4163 y(the)f(parsed)g(entities)h(start)f(here.)g(It)g(is)h
+(described)e(by)h(the)g(class)h Fq(document)f Fv(contained)e(in)j(the)f
+(module)479 4271 y Fq(Pxp_document)p Fv(.)f(Y)-9 b(ou)19
+b(can)h(get)h(some)f(global)f(information,)e(such)j(as)h(the)f(XML)h
+(declaration)d(the)j(document)479 4379 y(be)o(gins)f(with,)g(the)g(DTD)
+g(of)g(the)g(document,)e(global)i(processing)e(instructions,)h(and)h
+(most)g(important,)f(the)479 4487 y(document)f(tree.)p
+Black 396 4595 a Ft(\225)p Black 60 w Fr(The)j(contents)e(of)h
+(documents:)f Fv(The)h(contents)f(ha)n(v)o(e)h(the)g(structure)f(of)h
+(a)h(tree:)f(Elements)g(contain)f(other)g(elements)479
+4703 y(and)h(te)o(xt)744 4670 y Ff(1)768 4703 y Fv(.)h(The)e(common)g
+(type)h(to)g(represent)f(both)g(kinds)h(of)g(content)f(is)i
+Fq(node)f Fv(which)g(is)h(a)g(class)g(type)e(that)479
+4811 y(uni\002es)h(the)h(properties)d(of)i(elements)g(and)g(character)f
+(data.)h(Ev)o(ery)e(node)i(has)g(a)h(list)g(of)f(children)f(\(which)g
+(is)i(empty)p Black 3800 5278 a Fr(24)p Black eop
+%%Page: 25 25
+25 24 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 479 579 a Fv(if)h(the)f(element)g(is)h(empty)e(or)h(the)g(node)f
+(represents)h(te)o(xt\);)f(nodes)h(may)g(ha)n(v)o(e)f(attrib)n(utes;)h
+(nodes)g(ha)n(v)o(e)f(al)o(w)o(ays)i(te)o(xt)479 687
+y(contents.)d(There)g(are)g(tw)o(o)h(implementations)e(of)h
+Fq(node)p Fv(,)h(the)f(class)i Fq(element_impl)d Fv(for)h(elements,)g
+(and)g(the)h(class)479 795 y Fq(data_impl)h Fv(for)f(te)o(xt)h(data.)g
+(Y)-9 b(ou)20 b(\002nd)f(these)i(classes)g(and)f(class)h(types)f(in)g
+(the)g(module)f Fq(Pxp_document)p Fv(,)g(too.)479 944
+y(Note)h(that)h(attrib)n(ute)f(lists)h(are)f(represented)f(by)g
+(non-class)h(v)n(alues.)p Black 396 1094 a Ft(\225)p
+Black 60 w Fr(The)h(node)e(e)n(xtension:)g Fv(F)o(or)h(adv)n(anced)e
+(usage,)i(e)n(v)o(ery)e(node)i(of)f(the)i(document)d(may)i(ha)n(v)o(e)f
+(an)h(associated)479 1202 y Fr(e)n(xtension)g Fv(which)g(is)h(simply)f
+(a)g(second)f(object.)h(This)g(object)g(must)g(ha)n(v)o(e)g(the)g
+(three)g(methods)f Fq(clone)p Fv(,)g Fq(node)p Fv(,)h(and)479
+1310 y Fq(set_node)f Fv(as)h(bare)f(minimum,)e(b)n(ut)j(you)e(are)i
+(free)e(to)i(add)f(methods)f(as)i(you)f(w)o(ant.)g(This)g(is)i(the)e
+(preferred)e(w)o(ay)j(to)479 1417 y(add)g(functionality)e(to)i(the)h
+(document)d(tree)1746 1384 y Ff(2)1770 1417 y Fv(.)j(The)e(class)j
+(type)d Fq(extension)h Fv(is)h(de\002ned)e(in)h Fq(Pxp_document)p
+Fv(,)f(too.)p Black 396 1525 a Ft(\225)p Black 60 w Fr(The)i(DTD:)f
+Fv(Sometimes)g(it)h(is)g(necessary)e(to)i(access)f(the)h(DTD)f(of)g(a)h
+(document;)d(the)i(a)n(v)o(erage)f(application)g(does)479
+1633 y(not)h(need)g(this)g(feature.)f(The)h(class)h Fq(dtd)g
+Fv(describes)e(DTDs,)i(and)e(mak)o(es)h(it)h(possible)f(to)h(get)f
+(representations)e(of)479 1741 y(element,)i(entity)-5
+b(,)19 b(and)h(notation)e(declarations)h(as)i(well)g(as)g(processing)e
+(instructions)g(contained)f(in)j(the)f(DTD.)479 1849
+y(This)g(class,)g(and)f Fq(dtd_element)p Fv(,)g Fq(dtd_notation)p
+Fv(,)e(and)i Fq(proc_instruction)f Fv(can)h(be)h(found)e(in)i(the)f
+(module)479 1957 y Fq(Pxp_dtd)p Fv(.)h(There)f(are)h(a)h(couple)e(of)h
+(classes)h(representing)d(dif)n(ferent)h(kinds)g(of)h(entities;)h
+(these)f(can)g(be)g(found)f(in)479 2065 y(the)h(module)f
+Fq(Pxp_entity)p Fv(.)396 2214 y(Additionally)-5 b(,)18
+b(the)i(follo)n(wing)f(modules)g(play)h(a)g(role:)p Black
+396 2447 a Ft(\225)p Black 60 w Fr(Pxp_yacc:)e Fv(Here)i(the)h(main)e
+(parsing)h(functions)e(such)i(as)h Fq(parse_document_entity)c
+Fv(are)k(located.)e(Some)479 2555 y(additional)g(types)h(and)g
+(functions)f(allo)n(w)h(the)g(parser)f(to)i(be)f(con\002gured)e(in)i(a)
+h(non-standard)c(w)o(ay)-5 b(.)p Black 396 2663 a Ft(\225)p
+Black 60 w Fr(Pxp_types:)19 b Fv(This)h(is)h(a)g(collection)e(of)h
+(basic)g(types)g(and)g(e)o(xceptions.)396 2812 y(There)g(are)g(some)g
+(further)e(modules)i(that)g(are)g(needed)f(internally)g(b)n(ut)h(are)g
+(not)g(part)g(of)g(the)g(API.)396 2962 y(Let)h(the)f(document)e(to)i
+(be)h(parsed)e(be)h(stored)g(in)g(a)h(\002le)g(called)f
+Fq(doc.xml)p Fv(.)f(The)h(parsing)f(process)h(is)h(started)f(by)396
+3070 y(calling)g(the)g(function)396 3250 y Fq(val)45
+b(parse_document_entity)c(:)k(config)e(->)i(source)f(->)g('ext)g(spec)h
+(->)f('ext)g(document)396 3441 y Fv(de\002ned)19 b(in)i(the)f(module)f
+Fq(Pxp_yacc)p Fv(.)g(The)h(\002rst)h(ar)o(gument)d(speci\002es)i(some)g
+(global)g(properties)e(of)i(the)g(parser;)g(it)h(is)396
+3549 y(recommended)c(to)j(start)g(with)g(the)g Fq(default_config)p
+Fv(.)e(The)h(second)g(ar)o(gument)e(determines)i(where)g(the)h
+(document)396 3657 y(to)h(be)f(parsed)f(comes)h(from;)f(this)i(may)f
+(be)g(a)g(\002le,)h(a)g(channel,)d(or)i(an)g(entity)g(ID.)g(T)-7
+b(o)21 b(parse)f Fq(doc.xml)p Fv(,)f(it)i(is)g(suf)n(\002cient)396
+3764 y(to)g(pass)f Fq(from_file)44 b("doc.xml")p Fv(.)396
+3914 y(The)20 b(third)g(ar)o(gument)e(passes)i(the)h(object)e
+(speci\002cation)h(to)g(use.)g(Roughly)f(speaking,)g(it)i(determines)e
+(which)g(classes)396 4022 y(implement)g(the)h(node)g(objects)f(of)h
+(which)g(element)g(types,)f(and)h(which)g(e)o(xtensions)f(are)h(to)g
+(be)g(used.)g(The)g Fq('ext)396 4130 y Fv(polymorphic)d(v)n(ariable)i
+(is)j(the)e(type)f(of)h(the)h(e)o(xtension.)d(F)o(or)i(the)g(moment,)f
+(let)i(us)f(simply)g(pass)h Fq(default_spec)d Fv(as)396
+4238 y(this)j(ar)o(gument,)d(and)h(ignore)g(it.)396 4387
+y(So)i(the)f(follo)n(wing)e(e)o(xpression)h(parses)h
+Fq(doc.xml)p Fv(:)396 4567 y Fq(open)44 b(Pxp_yacc)396
+4664 y(let)h(d)f(=)h(parse_document_entity)c(default_config)i
+(\(from_file)g("doc.xml"\))g(de-)396 4762 y(fault_spec)p
+Black 3800 5278 a Fr(25)p Black eop
+%%Page: 26 26
+26 25 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(Note)g(that)h Fq(default_config)d
+Fv(implies)i(that)h(w)o(arnings)e(are)h(collected)g(b)n(ut)g(not)g
+(printed.)e(Errors)h(raise)i(one)f(of)g(the)396 687 y(e)o(xception)f
+(de\002ned)g(in)h Fq(Pxp_types)p Fv(;)f(to)i(get)f(readable)f(errors)g
+(and)h(w)o(arnings)f(catch)h(the)g(e)o(xceptions)f(as)i(follo)n(ws:)396
+867 y Fq(class)44 b(warner)g(=)486 964 y(object)576 1061
+y(method)f(warn)i(w)f(=)665 1158 y(print_endline)f(\("WARNING:)g(")i(^)
+f(w\))486 1256 y(end)396 1353 y(;;)396 1547 y(try)486
+1644 y(let)g(config)g(=)h({)f(default_config)f(with)h(warner)g(=)h(new)
+f(warner)g(})g(in)486 1741 y(let)g(d)h(=)g(parse_document_entity)c
+(config)j(\(from_file)f("doc.xml"\))g(default_spec)486
+1838 y(in)576 1935 y(...)396 2033 y(with)531 2130 y(e)h(->)620
+2227 y(print_endline)f(\(Pxp_types.string_of_exn)e(e\))396
+2418 y Fv(No)n(w)20 b Fq(d)h Fv(is)g(an)f(object)g(of)g(the)g
+Fq(document)f Fv(class.)i(If)f(you)g(w)o(ant)g(the)g(node)f(tree,)h
+(you)g(can)g(get)g(the)g(root)f(element)h(by)396 2598
+y Fq(let)45 b(root)f(=)g(d)h(#)g(root)396 2789 y Fv(and)20
+b(if)g(you)g(w)o(ould)f(rather)h(lik)o(e)g(to)g(access)h(the)f(DTD,)g
+(determine)f(it)i(by)396 2969 y Fq(let)45 b(dtd)f(=)h(d)f(#)h(dtd)396
+3160 y Fv(As)21 b(it)g(is)g(more)f(interesting,)f(let)h(us)h(in)m(v)o
+(estigate)e(the)h(node)f(tree)h(no)n(w)-5 b(.)19 b(Gi)n(v)o(en)g(the)i
+(root)e(element,)g(it)i(is)h(possible)d(to)396 3268 y(recursi)n(v)o
+(ely)f(tra)n(v)o(erse)h(the)h(whole)f(tree.)g(The)g(children)g(of)g(a)h
+(node)f Fq(n)h Fv(are)f(returned)f(by)h(the)h(method)e
+Fq(sub_nodes)p Fv(,)g(and)396 3376 y(the)i(type)g(of)g(a)h(node)e(is)i
+(returned)d(by)i Fq(node_type)p Fv(.)f(This)i(function)d(tra)n(v)o
+(erses)i(the)g(tree,)g(and)g(prints)g(the)g(type)f(of)h(each)396
+3484 y(node:)396 3664 y Fq(let)45 b(rec)f(print_structure)e(n)j(=)486
+3761 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 3858
+y(match)g(ntype)g(with)576 3955 y(T_element)f(name)h(->)665
+4053 y(print_endline)f(\("Element)g(of)i(type)f(")h(^)f(name\);)665
+4150 y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665
+4247 y(List.iter)h(print_structure)e(children)486 4344
+y(|)j(T_data)e(->)665 4441 y(print_endline)g("Data")486
+4538 y(|)i(_)f(->)665 4635 y(\(*)h(Other)f(node)g(types)g(are)g(not)h
+(possible)e(unless)h(the)g(parser)g(is)h(configured)800
+4733 y(differently.)710 4830 y(*\))p Black 3798 5278
+a Fr(26)p Black eop
+%%Page: 27 27
+27 26 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 665 579 a Fq(assert)44 b(false)396 770 y Fv(Y)-9
+b(ou)20 b(can)g(call)g(this)h(function)e(by)396 950 y
+Fq(print_structure)43 b(root)396 1141 y Fv(The)20 b(type)g(returned)e
+(by)i Fq(node_type)f Fv(is)i(either)f Fq(T_element)43
+b(name)21 b Fv(or)e Fq(T_data)p Fv(.)h(The)g Fq(name)g
+Fv(of)g(the)g(element)g(type)396 1249 y(is)h(the)g(string)e(included)g
+(in)i(the)f(angle)f(brack)o(ets.)h(Note)g(that)g(only)f(elements)h(ha)n
+(v)o(e)g(children;)f(data)h(nodes)f(are)h(al)o(w)o(ays)396
+1357 y(lea)n(v)o(es)h(of)e(the)i(tree.)396 1506 y(There)f(are)g(some)g
+(more)f(methods)g(in)i(order)e(to)h(access)h(a)f(parsed)g(node)f(tree:)
+p Black 396 1739 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(parent)p
+Fv(:)19 b(Returns)h(the)h(parent)e(node,)g(or)h(raises)h
+Fq(Not_found)e Fv(if)h(the)g(node)g(is)h(already)e(the)h(root)p
+Black 396 1847 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(root)p
+Fv(:)20 b(Returns)g(the)g(root)g(of)f(the)i(node)e(tree.)p
+Black 396 1955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(attribute)e(a)p
+Fv(:)21 b(Returns)f(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(with)h
+(name)e Fq(a)p Fv(.)i(The)e(method)g(returns)h(a)g(v)n(alue)g(for)479
+2063 y(e)n(v)o(ery)f Fr(declar)m(ed)j Fv(attrib)n(ute,)d(independently)
+e(of)j(whether)f(the)i(attrib)n(ute)e(instance)h(is)h(de\002ned)e(or)h
+(not.)g(If)g(the)479 2170 y(attrib)n(ute)g(is)h(not)f(declared,)f
+Fq(Not_found)g Fv(will)i(be)f(raised.)g(\(In)f(well-formedness)f(mode,)
+h(e)n(v)o(ery)g(attrib)n(ute)h(is)479 2278 y(considered)f(as)i(being)e
+(implicitly)h(declared)e(with)j(type)f Fq(CDATA)p Fv(.\))479
+2428 y(The)g(follo)n(wing)f(return)g(v)n(alues)g(are)i(possible:)f
+Fq(Value)44 b(s)p Fv(,)20 b Fq(Valuelist)43 b(sl)21 b
+Fv(,)f(and)g Fq(Implied_value)p Fv(.)e(The)i(\002rst)479
+2536 y(tw)o(o)h(v)n(alue)e(types)h(indicate)g(that)g(the)g(attrib)n
+(ute)g(v)n(alue)g(is)h(a)n(v)n(ailable,)e(either)h(because)g(there)f
+(is)i(a)g(de\002nition)479 2644 y Fn(a)p Fq(=")p Fn(value)p
+Fq(")f Fv(in)g(the)g(XML)g(te)o(xt,)g(or)g(because)g(there)f(is)i(a)g
+(def)o(ault)e(v)n(alue)h(\(declared)f(in)h(the)g(DTD\).)g(Only)g(if)g
+(both)479 2752 y(the)g(instance)g(de\002nition)f(and)h(the)g(def)o
+(ault)g(declaration)e(are)i(missing,)g(the)h(latter)f(v)n(alue)f
+Fq(Implied_value)g Fv(will)479 2860 y(be)h(returned.)479
+3009 y(In)g(the)g(DTD,)h(e)n(v)o(ery)d(attrib)n(ute)i(is)h(typed.)e
+(There)h(are)g(single-v)n(alue)e(types)i(\(CD)m(A)-9
+b(T)h(A,)20 b(ID,)g(IDREF)-7 b(,)21 b(ENTITY)-11 b(,)479
+3117 y(NMT)o(OKEN,)19 b(enumerations\),)f(in)i(which)g(case)g(the)h
+(method)d(passes)j Fq(Value)44 b(s)21 b Fv(back,)e(where)h
+Fq(s)g Fv(is)h(the)479 3225 y(normalized)e(string)g(v)n(alue)h(of)g
+(the)g(attrib)n(ute.)g(The)f(other)h(types)g(\(IDREFS,)g(ENTITIES,)f
+(NMT)o(OKENS\))479 3333 y(represent)g(list)j(v)n(alues,)d(and)h(the)g
+(parser)g(splits)h(the)f(XML)g(literal)h(into)e(se)n(v)o(eral)h(tok)o
+(ens)g(and)f(returns)h(these)g(tok)o(ens)479 3441 y(as)h
+Fq(Valuelist)44 b(sl)p Fv(.)479 3590 y(Normalization)19
+b(means)h(that)g(entity)g(references)e(\(the)i Fq(&)p
+Fn(name)p Fq(;)g Fv(tok)o(ens\))f(and)h(character)f(references)479
+3698 y(\()p Fq(&#)p Fn(number)s Fq(;)p Fv(\))g(are)h(replaced)f(by)g
+(the)i(te)o(xt)f(the)o(y)f(represent,)g(and)h(that)g(white)g(space)g
+(characters)f(are)i(con)m(v)o(erted)479 3806 y(into)f(plain)g(spaces.)p
+Black 396 3955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(data)p
+Fv(:)20 b(Returns)g(the)g(character)f(data)h(contained)f(in)h(the)g
+(node.)f(F)o(or)h(data)g(nodes,)f(the)h(meaning)f(is)i(ob)o(vious)479
+4063 y(as)g(this)g(is)g(the)f(main)g(content)f(of)h(data)g(nodes.)f(F)o
+(or)h(element)g(nodes,)f(this)i(method)e(returns)g(the)h(concatenated)
+479 4171 y(contents)g(of)g(all)g(inner)g(data)g(nodes.)479
+4321 y(Note)g(that)h(entity)f(references)e(included)h(in)h(the)h(te)o
+(xt)f(are)g(resolv)o(ed)f(while)h(the)o(y)f(are)h(being)g(parsed;)f
+(for)h(e)o(xample)479 4429 y(the)g(te)o(xt)h("a)f(<>)g(b")g(will)
+h(be)f(returned)e(as)j("a)g(<>)f(b")g(by)g(this)h(method.)d(Spaces)j
+(of)f(data)g(nodes)f(are)h(al)o(w)o(ays)479 4537 y(preserv)o(ed.)e(Ne)n
+(wlines)j(are)f(preserv)o(ed,)e(b)n(ut)i(al)o(w)o(ays)g(con)m(v)o
+(erted)e(to)i(\\n)h(characters)e(e)n(v)o(en)g(if)i(ne)n(wlines)e(are)i
+(encoded)479 4644 y(as)g(\\r\\n)f(or)g(\\r)-5 b(.)21
+b(Normally)e(you)g(will)i(ne)n(v)o(er)e(see)i(tw)o(o)f(adjacent)f(data)
+i(nodes)e(because)h(the)g(parser)f(collapses)h(all)h(data)479
+4752 y(material)f(at)h(one)e(location)h(into)g(one)f(node.)g(\(Ho)n(we)
+n(v)o(er)m(,)f(if)i(you)g(create)g(your)f(o)n(wn)g(tree)h(or)g
+(transform)f(the)h(parsed)479 4860 y(tree,)g(it)h(is)g(possible)f(to)h
+(ha)n(v)o(e)e(adjacent)h(data)g(nodes.\))p Black 3797
+5278 a Fr(27)p Black eop
+%%Page: 28 28
+28 27 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 479 579 a Fv(Note)g(that)h(elements)f(that)g(do)g
+Fr(not)h Fv(allo)n(w)f(#PCD)m(A)-9 b(T)h(A)20 b(as)h(content)e(will)i
+(not)f(ha)n(v)o(e)g(data)g(nodes)f(as)i(children.)e(This)479
+687 y(means)h(that)g(spaces)h(and)f(ne)n(wlines,)f(the)h(only)g
+(character)f(material)g(allo)n(wed)h(for)g(such)f(elements,)h(are)g
+(silently)479 795 y(dropped.)396 986 y(F)o(or)g(e)o(xample,)e(if)i(the)
+f(task)h(is)h(to)f(print)f(all)h(contents)f(of)g(elements)h(with)f
+(type)h("v)n(aluable")e(whose)h(attrib)n(ute)g("priority")396
+1094 y(is)i("1",)f(this)h(function)d(can)i(help:)396
+1274 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
+1371 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 1468
+y(match)g(ntype)g(with)576 1565 y(T_element)f("valuable")g(when)h(n)h
+(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
+1662 y(print_endline)f("Valuable)g(node)h(with)h(priotity)e(1)i
+(found:";)665 1759 y(print_endline)e(\(n)h(#)h(data\))486
+1857 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 1954
+y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665 2051
+y(List.iter)h(print_valuable_prio1)d(children)486 2148
+y(|)k(_)f(->)665 2245 y(assert)g(false)396 2436 y Fv(Y)-9
+b(ou)20 b(can)g(call)g(this)h(function)e(by:)396 2616
+y Fq(print_valuable_prio1)42 b(root)396 2807 y Fv(If)20
+b(you)g(lik)o(e)g(a)h(DSSSL-lik)o(e)f(style,)g(you)g(can)g(mak)o(e)f
+(the)h(function)f Fq(process_children)f Fv(e)o(xplicit:)396
+2987 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486
+3182 y(let)f(process_children)e(n)j(=)576 3279 y(let)f(children)f(=)i
+(n)g(#)f(sub_nodes)g(in)576 3376 y(List.iter)f(print_valuable_prio1)e
+(children)486 3473 y(in)486 3667 y(let)j(ntype)g(=)h(n)g(#)f(node_type)
+g(in)486 3764 y(match)g(ntype)g(with)576 3862 y(T_element)f("valuable")
+g(when)h(n)h(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665
+3959 y(print_endline)f("Valuable)g(node)h(with)h(priority)e(1)i
+(found:";)665 4056 y(print_endline)e(\(n)h(#)h(data\))486
+4153 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 4250
+y(process_children)e(n)486 4347 y(|)j(_)f(->)665 4444
+y(assert)g(false)396 4635 y Fv(So)21 b(f)o(ar)m(,)e(O'Caml)h(is)i(no)n
+(w)d(a)i(simple)f("style-sheet)g(language":)e(Y)-9 b(ou)20
+b(can)g(form)f(a)h(big)g("match")g(e)o(xpression)e(to)396
+4743 y(distinguish)h(between)h(all)h(signi\002cant)e(cases,)i(and)f
+(pro)o(vide)e(dif)n(ferent)g(reactions)i(on)g(dif)n(ferent)e
+(conditions.)h(But)h(this)396 4851 y(technique)f(has)h(limitations;)g
+(the)h("match")e(e)o(xpression)g(tends)h(to)g(get)g(lar)o(ger)f(and)h
+(lar)o(ger)m(,)e(and)i(it)g(is)i(dif)n(\002cult)d(to)i(store)p
+Black 3800 5278 a Fr(28)p Black eop
+%%Page: 29 29
+29 28 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(intermediate)f(v)n(alues)h(as)h(there)e(is)j(only)d
+(one)h(big)f(recursion.)g(Alternati)n(v)o(ely)-5 b(,)18
+b(it)j(is)g(also)f(possible)g(to)h(represent)e(the)396
+687 y(v)n(arious)g(cases)i(as)g(classes,)g(and)f(to)g(use)h(dynamic)d
+(method)h(lookup)g(to)h(\002nd)g(the)g(appropiate)e(class.)j(The)f(ne)o
+(xt)f(section)396 795 y(e)o(xplains)g(this)i(technique)e(in)h(detail.)
+-2 1213 y Fx(2.3.)39 b(Class-based)e(pr)m(ocessing)g(of)j(the)f(node)f
+(tree)396 1393 y Fv(By)21 b(def)o(ault,)e(the)h(parsed)g(node)f(tree)h
+(consists)h(of)f(objects)g(of)g(the)g(same)g(class;)h(this)g(is)g(a)g
+(good)e(design)g(as)i(long)e(as)i(you)396 1501 y(w)o(ant)g(only)e(to)h
+(access)h(selected)f(parts)g(of)g(the)h(document.)c(F)o(or)j(comple)o
+(x)f(transformations,)e(it)k(may)f(be)g(better)g(to)g(use)396
+1609 y(dif)n(ferent)f(classes)i(for)f(objects)g(describing)e(dif)n
+(ferent)h(element)g(types.)396 1758 y(F)o(or)h(e)o(xample,)f(if)h(the)g
+(DTD)h(declares)e(the)i(element)e(types)h Fq(a)p Fv(,)h
+Fq(b)p Fv(,)f(and)g Fq(c)p Fv(,)g(and)g(if)g(the)g(task)h(is)g(to)f
+(con)m(v)o(ert)e(an)j(arbitrary)396 1866 y(document)d(into)i(a)h
+(printable)e(format,)g(the)h(idea)g(is)h(to)f(de\002ne)g(for)g(e)n(v)o
+(ery)f(element)g(type)h(a)g(separate)g(class)h(that)g(has)f(a)396
+1974 y(method)f Fq(print)p Fv(.)h(The)g(classes)h(are)f
+Fq(eltype_a)p Fv(,)f Fq(eltype_b)p Fv(,)g(and)h Fq(eltype_c)p
+Fv(,)f(and)h(e)n(v)o(ery)f(class)i(implements)396 2082
+y Fq(print)f Fv(such)g(that)g(elements)g(of)g(the)g(type)g
+(corresponding)d(to)j(the)g(class)i(are)e(con)m(v)o(erted)d(to)k(the)f
+(output)f(format.)396 2232 y(The)h(parser)g(supports)f(such)h(a)g
+(design)g(directly)-5 b(.)19 b(As)i(it)g(is)g(impossible)e(to)i(deri)n
+(v)o(e)d(recursi)n(v)o(e)h(classes)i(in)g(O'Caml)3703
+2198 y Ff(3)3727 2232 y Fv(,)g(the)396 2340 y(specialized)f(element)f
+(classes)j(cannot)d(be)h(formed)f(by)g(simply)h(inheriting)f(from)g
+(the)h(b)n(uilt-in)g(classes)h(of)f(the)g(parser)396
+2447 y(and)g(adding)f(methods)g(for)g(customized)g(functionality)-5
+b(.)18 b(T)-7 b(o)20 b(get)g(around)f(this)h(limitation,)g(e)n(v)o(ery)
+f(node)g(of)h(the)396 2555 y(document)e(tree)j(is)g(represented)d(by)i
+Fr(two)h Fv(objects,)e(one)h(called)g("the)g(node")f(and)h(containing)e
+(the)i(recursi)n(v)o(e)396 2663 y(de\002nition)f(of)h(the)g(tree,)g
+(one)g(called)g("the)g(e)o(xtension".)e(Ev)o(ery)h(node)g(object)h(has)
+g(a)h(reference)e(to)h(the)g(e)o(xtension,)f(and)396
+2771 y(the)h(e)o(xtension)f(has)i(a)f(reference)f(to)h(the)g(node.)f
+(The)h(adv)n(antage)e(of)i(this)h(model)e(is)i(that)g(it)g(is)g(no)n(w)
+e(possible)h(to)396 2879 y(customize)g(the)g(e)o(xtension)f(without)g
+(af)n(fecting)g(the)h(typing)f(constraints)g(of)h(the)h(recursi)n(v)o
+(e)d(node)h(de\002nition.)396 3029 y(Ev)o(ery)g(e)o(xtension)g(must)h
+(ha)n(v)o(e)g(the)g(three)g(methods)f Fq(clone)p Fv(,)g
+Fq(node)p Fv(,)h(and)g Fq(set_node)p Fv(.)f(The)h(method)f
+Fq(clone)h Fv(creates)396 3137 y(a)h(deep)e(cop)o(y)h(of)g(the)g(e)o
+(xtension)f(object)g(and)h(returns)f(it;)i Fq(node)f
+Fv(returns)g(the)g(node)f(object)h(for)f(this)i(e)o(xtension)e(object;)
+396 3244 y(and)h Fq(set_node)f Fv(is)i(used)f(to)h(tell)g(the)f(e)o
+(xtension)f(object)g(which)h(node)f(is)i(associated)f(with)g(it,)h
+(this)g(method)e(is)396 3352 y(automatically)g(called)h(when)g(the)g
+(node)f(tree)h(is)h(initialized.)f(The)g(follo)n(wing)e(de\002nition)h
+(is)i(a)g(good)e(starting)h(point)396 3460 y(for)g(these)g(methods;)f
+(usually)h Fq(clone)g Fv(must)g(be)g(further)f(re\002ned)g(when)h
+(instance)g(v)n(ariables)f(are)h(added)f(to)h(the)h(class:)396
+3640 y Fq(class)44 b(custom_extension)e(=)486 3738 y(object)i(\(self\))
+576 3932 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(custom_extension)d
+(node)i(option\))576 4126 y(method)f(clone)h(=)h({<)g(>})576
+4223 y(method)e(node)i(=)665 4320 y(match)f(node)g(with)845
+4418 y(None)g(->)934 4515 y(assert)g(false)755 4612 y(|)h(Some)f(n)g
+(->)h(n)576 4709 y(method)e(set_node)h(n)h(=)665 4806
+y(node)f(<-)h(Some)f(n)p Black 3800 5278 a Fr(29)p Black
+eop
+%%Page: 30 30
+30 29 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 676 a Fq(end)396 867 y Fv(This)h(part)e(of)h(the)h(e)o
+(xtension)d(is)j(usually)f(the)g(same)h(for)e(all)i(classes,)g(so)g(it)
+g(is)g(a)f(good)f(idea)h(to)g(consider)396 975 y Fq(custom_extension)e
+Fv(as)j(the)f(super)n(-class)g(of)g(the)h(further)d(class)j
+(de\002nitions.)e(Continuining)f(the)j(e)o(xample)d(of)396
+1083 y(abo)o(v)o(e,)h(we)h(can)g(de\002ne)g(the)g(element)g(type)f
+(classes)j(as)e(follo)n(ws:)396 1263 y Fq(class)44 b(virtual)g
+(custom_extension)e(=)486 1360 y(object)i(\(self\))576
+1457 y(...)g(clone,)g(node,)g(set_node)f(defined)h(as)g(above)g(...)576
+1652 y(method)f(virtual)h(print)g(:)h(out_channel)e(->)h(unit)486
+1749 y(end)396 1943 y(class)g(eltype_a)g(=)486 2040 y(object)g
+(\(self\))576 2137 y(inherit)f(custom_extension)576 2234
+y(method)g(print)h(ch)h(=)g(...)486 2332 y(end)396 2526
+y(class)f(eltype_b)g(=)486 2623 y(object)g(\(self\))576
+2720 y(inherit)f(custom_extension)576 2817 y(method)g(print)h(ch)h(=)g
+(...)486 2914 y(end)396 3109 y(class)f(eltype_c)g(=)486
+3206 y(object)g(\(self\))576 3303 y(inherit)f(custom_extension)576
+3400 y(method)g(print)h(ch)h(=)g(...)486 3497 y(end)396
+3688 y Fv(The)20 b(method)f Fq(print)h Fv(can)g(no)n(w)f(be)i
+(implemented)d(for)h(e)n(v)o(ery)g(element)h(type)g(separately)-5
+b(.)18 b(Note)i(that)h(you)e(get)h(the)396 3796 y(associated)g(node)f
+(by)h(in)m(v)n(oking)396 3976 y Fq(self)44 b(#)h(node)396
+4167 y Fv(and)20 b(you)f(get)h(the)h(e)o(xtension)d(object)i(of)g(a)h
+(node)e Fq(n)h Fv(by)g(writing)396 4347 y Fq(n)45 b(#)g(extension)396
+4538 y Fv(It)21 b(is)g(guaranteed)d(that)396 4718 y Fq(self)44
+b(#)h(node)f(#)h(extension)e(==)i(self)p Black 3800 5278
+a Fr(30)p Black eop
+%%Page: 31 31
+31 30 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(al)o(w)o(ays)h(holds.)396 728 y(Here)f(are)g(sample)
+g(de\002nitions)g(of)g(the)g Fq(print)g Fv(methods:)396
+909 y Fq(class)44 b(eltype_a)g(=)486 1006 y(object)g(\(self\))576
+1103 y(inherit)f(custom_extension)576 1200 y(method)g(print)h(ch)h(=)
+665 1297 y(\(*)g(Nodes)f(<a>...</a>)f(are)h(only)g(containers:)f(*\))
+665 1394 y(output_string)g(ch)h("\(";)665 1491 y(List.iter)755
+1588 y(\(fun)g(n)h(->)f(n)h(#)g(extension)e(#)i(print)f(ch\))755
+1686 y(\(self)g(#)h(node)f(#)g(sub_nodes\);)665 1783
+y(output_string)f(ch)h("\)";)486 1880 y(end)396 2074
+y(class)g(eltype_b)g(=)486 2171 y(object)g(\(self\))576
+2268 y(inherit)f(custom_extension)576 2366 y(method)g(print)h(ch)h(=)
+665 2463 y(\(*)g(Print)f(the)g(value)g(of)h(the)f(CDATA)g(attribute)f
+("print":)h(*\))665 2560 y(match)g(self)g(#)h(node)f(#)h(attribute)e
+("print")h(with)755 2657 y(Value)g(s)314 b(->)44 b(output_string)f(ch)h
+(s)665 2754 y(|)h(Implied_value)e(->)h(output_string)f(ch)h
+("<missing>")665 2851 y(|)h(Valuelist)e(l)135 b(->)44
+b(assert)g(false)1517 2948 y(\(*)h(not)f(possible)f(because)h(the)g
+(att)h(is)f(CDATA)g(*\))486 3045 y(end)396 3240 y(class)g(eltype_c)g(=)
+486 3337 y(object)g(\(self\))576 3434 y(inherit)f(custom_extension)576
+3531 y(method)g(print)h(ch)h(=)665 3628 y(\(*)g(Print)f(the)g(contents)
+g(of)g(this)g(element:)g(*\))665 3725 y(output_string)f(ch)h(\(self)g
+(#)h(node)f(#)h(data\))486 3823 y(end)396 4017 y(class)f
+(null_extension)f(=)486 4114 y(object)h(\(self\))576
+4211 y(inherit)f(custom_extension)576 4308 y(method)g(print)h(ch)h(=)g
+(assert)e(false)486 4405 y(end)396 4638 y Fv(The)20 b(remaining)f(task)
+h(is)h(to)g(con\002gure)d(the)i(parser)g(such)g(that)g(these)g(e)o
+(xtension)f(classes)i(are)f(actually)g(used.)g(Here)396
+4746 y(another)f(problem)f(arises:)j(It)g(is)g(not)f(possible)g(to)g
+(dynamically)e(select)j(the)f(class)h(of)f(an)g(object)g(to)g(be)h
+(created.)e(As)396 4854 y(w)o(orkaround,)e(PXP)k(allo)n(ws)g(the)f
+(user)g(to)g(specify)g Fr(e)n(xemplar)g(objects)g Fv(for)f(the)h(v)n
+(arious)g(element)f(types;)h(instead)g(of)p Black 3800
+5278 a Fr(31)p Black eop
+%%Page: 32 32
+32 31 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(creating)f(the)i(nodes)e(of)h(the)g(tree)g(by)g
+(applying)f(the)h Fq(new)g Fv(operator)e(the)j(nodes)e(are)h(produced)e
+(by)i(duplicating)e(the)396 687 y(e)o(x)o(emplars.)h(As)h(object)g
+(duplication)f(preserv)o(es)g(the)h(class)h(of)f(the)g(object,)f(one)h
+(can)g(create)g(fresh)g(objects)g(of)g(e)n(v)o(ery)396
+795 y(class)h(for)f(which)g(pre)n(viously)e(an)i(e)o(x)o(emplar)e(has)j
+(been)e(re)o(gistered.)396 944 y(Ex)o(emplars)g(are)h(meant)g(as)h
+(objects)f(without)f(contents,)g(the)h(only)g(interesting)f(thing)g(is)
+j(that)e(e)o(x)o(emplars)e(are)396 1052 y(instances)i(of)g(a)h(certain)
+f(class.)g(The)g(creation)f(of)h(an)h(e)o(x)o(emplar)d(for)h(an)h
+(element)g(node)f(can)h(be)g(done)f(by:)396 1232 y Fq(let)45
+b(element_exemplar)d(=)i(new)h(element_impl)e(extension_exemplar)396
+1423 y Fv(And)20 b(a)h(data)f(node)f(e)o(x)o(emplar)f(is)j(created)f
+(by:)396 1603 y Fq(let)45 b(data_exemplar)d(=)j(new)f(data_impl)g
+(extension_exemplar)396 1794 y Fv(The)20 b(classes)h
+Fq(element_impl)e Fv(and)h Fq(data_impl)f Fv(are)h(de\002ned)f(in)i
+(the)f(module)f Fq(Pxp_document)p Fv(.)f(The)396 1902
+y(constructors)h(initialize)h(the)g(fresh)g(objects)g(as)h(empty)e
+(objects,)h(i.e.)g(without)g(children,)e(without)i(data)g(contents,)f
+(and)396 2010 y(so)i(on.)e(The)h Fq(extension_exemplar)e
+Fv(is)j(the)f(initial)h(e)o(xtension)e(object)g(the)h(e)o(x)o(emplars)f
+(are)h(associated)g(with.)396 2160 y(Once)g(the)g(e)o(x)o(emplars)f
+(are)h(created)f(and)h(stored)g(some)n(where)f(\(e.g.)g(in)h(a)h(hash)f
+(table\),)f(you)h(can)g(tak)o(e)g(an)g(e)o(x)o(emplar)396
+2268 y(and)g(create)g(a)g(concrete)f(instance)h(\(with)g(contents\))f
+(by)h(duplicating)e(it.)j(As)g(user)f(of)g(the)g(parser)g(you)f(are)h
+(normally)396 2376 y(not)g(concerned)e(with)i(this)h(as)g(this)g(is)g
+(part)f(of)g(the)g(internal)f(logic)h(of)g(the)g(parser)m(,)f(b)n(ut)h
+(as)h(background)c(kno)n(wledge)h(it)396 2483 y(is)j(w)o(orthwhile)e
+(to)i(mention)e(that)h(the)g(tw)o(o)h(methods)e Fq(create_element)f
+Fv(and)i Fq(create_data)f Fv(actually)g(perform)396 2591
+y(the)h(duplication)f(of)g(the)i(e)o(x)o(emplar)d(for)h(which)h(the)o
+(y)f(are)h(in)m(v)n(ok)o(ed,)e(additionally)g(apply)i(modi\002cations)e
+(to)j(the)f(clone,)396 2699 y(and)g(\002nally)g(return)f(the)h(ne)n(w)g
+(object.)f(Moreo)o(v)o(er)m(,)f(the)i(e)o(xtension)e(object)i(is)h
+(copied,)e(too,)h(and)f(the)i(ne)n(w)f(node)f(object)396
+2807 y(is)i(associated)f(with)g(the)g(fresh)g(e)o(xtension)e(object.)i
+(Note)g(that)g(this)g(is)h(the)f(reason)g(why)f(e)n(v)o(ery)g(e)o
+(xtension)f(object)i(must)396 2915 y(ha)n(v)o(e)g(a)g
+Fq(clone)g Fv(method.)396 3065 y(The)g(con\002guration)e(of)i(the)g
+(set)h(of)f(e)o(x)o(emplars)e(is)j(passed)f(to)h(the)f
+Fq(parse_document_entity)d Fv(function)i(as)i(third)396
+3173 y(ar)o(gument.)d(In)i(our)f(e)o(xample,)g(this)h(ar)o(gument)e
+(can)i(be)g(set)h(up)f(as)h(follo)n(ws:)396 3353 y Fq(let)45
+b(spec)f(=)486 3450 y(make_spec_from_alist)576 3547 y(~data_exemplar:)
+535 b(\(new)44 b(data_impl)g(\(new)g(null_extension\)\))576
+3644 y(~default_element_exemplar:)c(\(new)k(element_impl)f(\(new)h
+(null_extension\)\))576 3741 y(~element_alist:)710 3838
+y([)h("a",)89 b(new)44 b(element_impl)f(\(new)h(eltype_a\);)800
+3935 y("b",)89 b(new)44 b(element_impl)f(\(new)h(eltype_b\);)800
+4033 y("c",)89 b(new)44 b(element_impl)f(\(new)h(eltype_c\);)710
+4130 y(])576 4227 y(\(\))396 4418 y Fv(The)20 b Fq(~element_alist)f
+Fv(function)f(ar)o(gument)g(de\002nes)i(the)g(mapping)e(from)h(element)
+h(types)g(to)g(e)o(x)o(emplars)f(as)396 4526 y(associati)n(v)o(e)h
+(list.)h(The)f(ar)o(gument)e Fq(~data_exemplar)g Fv(speci\002es)j(the)f
+(e)o(x)o(emplar)e(for)i(data)g(nodes,)f(and)h(the)396
+4634 y Fq(~default_element_exemplar)d Fv(is)k(used)f(whene)n(v)o(er)e
+(the)i(parser)g(\002nds)g(an)g(element)g(type)f(for)h(which)g(the)396
+4742 y(associati)n(v)o(e)g(list)h(does)f(not)g(de\002ne)g(an)g(e)o(x)o
+(emplar)-5 b(.)p Black 3800 5278 a Fr(32)p Black eop
+%%Page: 33 33
+33 32 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(The)g(con\002guration)e(is)j(no)n(w)e(complete.)g(Y)
+-9 b(ou)20 b(can)g(still)h(use)g(the)f(same)g(parsing)f(functions,)g
+(only)g(the)h(initialization)g(is)396 687 y(a)h(bit)f(dif)n(ferent.)f
+(F)o(or)g(e)o(xample,)g(call)i(the)f(parser)f(by:)396
+867 y Fq(let)45 b(d)f(=)h(parse_document_entity)c(default_config)i
+(\(from_file)g("doc.xml"\))g(spec)396 1058 y Fv(Note)20
+b(that)h(the)f(resulting)f(document)f Fq(d)j Fv(has)f(a)h(usable)f
+(type;)g(especially)f(the)i Fq(print)f Fv(method)e(we)j(added)e(is)i
+(visible.)396 1166 y(So)g(you)e(can)h(print)g(your)e(document)h(by)396
+1346 y Fq(d)45 b(#)g(root)f(#)g(extension)g(#)g(print)g(stdout)396
+1578 y Fv(This)21 b(object-oriented)c(approach)h(looks)i(rather)f
+(complicated;)g(this)h(is)i(mostly)d(caused)h(by)g(w)o(orking)e(around)
+h(some)396 1686 y(problems)g(of)h(the)g(strict)h(typing)e(system)h(of)g
+(O'Caml.)g(Some)g(auxiliary)f(concepts)g(such)h(as)h(e)o(xtensions)e
+(were)396 1794 y(needed,)g(b)n(ut)h(the)g(practical)g(consequences)e
+(are)i(lo)n(w)-5 b(.)20 b(In)g(the)g(ne)o(xt)f(section,)h(one)g(of)g
+(the)g(e)o(xamples)f(of)h(the)396 1902 y(distrib)n(ution)f(is)i(e)o
+(xplained,)d(a)j(con)m(v)o(erter)d(from)h Fr(r)m(eadme)h
+Fv(documents)e(to)i(HTML.)-2 2321 y Fx(2.4.)39 b(Example:)f(An)h(HTML)f
+(bac)m(kend)g(f)m(or)h(the)g Fd(readme)44 b Fx(DTD)396
+2501 y Fv(The)20 b(con)m(v)o(erter)e(from)h Fr(r)m(eadme)h
+Fv(documents)e(to)i(HTML)g(documents)f(follo)n(ws)h(strictly)g(the)g
+(approach)e(to)j(de\002ne)e(one)396 2609 y(class)i(per)f(element)g
+(type.)f(The)h(HTML)g(code)g(is)h(similar)f(to)g(the)h
+Fr(r)m(eadme)e Fv(source,)g(because)h(of)g(this)h(most)f(elements)396
+2716 y(can)g(be)g(con)m(v)o(erted)e(in)i(the)g(follo)n(wing)f(w)o(ay:)h
+(Gi)n(v)o(en)g(the)g(input)f(element)396 2897 y Fq(<e>content</e>)396
+3088 y Fv(the)h(con)m(v)o(ersion)e(te)o(xt)i(is)h(the)f(concatenation)e
+(of)i(a)h(computed)d(pre\002x,)h(the)h(recursi)n(v)o(ely)f(con)m(v)o
+(erted)e(content,)i(and)h(a)396 3195 y(computed)e(suf)n(\002x.)396
+3345 y(Only)i(one)g(element)f(type)h(cannot)f(be)h(handled)f(by)h(this)
+g(scheme:)g Fq(footnote)p Fv(.)f(F)o(ootnotes)g(are)h(collected)g
+(while)g(the)o(y)396 3453 y(are)g(found)f(in)h(the)g(input)g(te)o(xt,)g
+(and)f(the)o(y)h(are)g(printed)f(after)h(the)g(main)g(te)o(xt)g(has)g
+(been)g(con)m(v)o(erted)d(and)j(printed.)-2 3781 y Fp(2.4.1.)35
+b(Header)396 4021 y Fq(open)44 b(Pxp_types)396 4118 y(open)g
+(Pxp_document)-2 4571 y Fp(2.4.2.)35 b(T)-7 b(ype)34
+b(dec)n(larations)396 4811 y Fq(class)44 b(type)g(footnote_printer)f(=)
+p Black 3800 5278 a Fr(33)p Black eop
+%%Page: 34 34
+34 33 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 579 a Fq(object)576 676 y(method)43 b(footnote_to_html)g(:)h
+(store_type)f(-)p Fo(>)i Fq(out_channel)e(-)p Fo(>)h
+Fq(unit)486 773 y(end)396 967 y(and)h(store_type)e(=)486
+1065 y(object)576 1162 y(method)g(alloc_footnote)g(:)i
+(footnote_printer)d(-)p Fo(>)i Fq(int)576 1259 y(method)f
+(print_footnotes)g(:)h(out_channel)f(-)p Fo(>)i Fq(unit)486
+1356 y(end)396 1453 y(;;)-2 1906 y Fp(2.4.3.)35 b(Class)g
+Fc(store)396 2073 y Fv(The)20 b Fq(store)g Fv(is)h(a)g(container)d(for)
+i(footnotes.)f(Y)-9 b(ou)19 b(can)h(add)g(a)g(footnote)f(by)h(in)m(v)n
+(oking)e Fq(alloc_footnote)p Fv(;)g(the)396 2181 y(ar)o(gument)g(is)j
+(an)f(object)g(of)g(the)g(class)h Fq(footnote_printer)p
+Fv(,)d(the)i(method)f(returns)g(the)i(number)d(of)i(the)g(footnote.)396
+2289 y(The)g(interesting)f(property)f(of)i(a)h(footnote)d(is)k(that)e
+(it)h(can)f(be)g(con)m(v)o(erted)d(to)k(HTML,)e(so)i(a)g
+Fq(footnote_printer)d Fv(is)396 2397 y(an)i(object)g(with)g(a)h(method)
+e Fq(footnote_to_html)p Fv(.)f(The)i(class)h Fq(footnote)e
+Fv(which)h(is)h(de\002ned)e(belo)n(w)h(has)g(a)396 2505
+y(compatible)f(method)g Fq(footnote_to_html)f Fv(such)i(that)g(objects)
+g(created)f(from)h(it)h(can)f(be)g(used)g(as)396 2613
+y Fq(footnote_printer)p Fv(s.)396 2763 y(The)g(other)f(method,)g
+Fq(print_footnotes)f Fv(prints)i(the)g(footnotes)f(as)i(de\002nition)e
+(list,)i(and)f(is)h(typically)e(in)m(v)n(ok)o(ed)396
+2870 y(after)h(the)g(main)g(material)g(of)g(the)g(page)g(has)g(already)
+f(been)h(printed.)e(Ev)o(ery)h(item)h(of)g(the)h(list)g(is)g(printed)e
+(by)396 2978 y Fq(footnote_to_html)p Fv(.)396 3200 y
+Fq(class)44 b(store)g(=)486 3297 y(object)g(\(self\))576
+3491 y(val)g(mutable)g(footnotes)f(=)i(\()f([])h(:)f(\(int)h(*)f
+(footnote_printer\))e(list)i(\))576 3589 y(val)g(mutable)g
+(next_footnote_number)d(=)k(1)576 3783 y(method)e(alloc_footnote)g(n)i
+(=)665 3880 y(let)g(number)e(=)i(next_footnote_number)d(in)665
+3977 y(next_footnote_number)g Fo(<)p Fq(-)i(number+1;)665
+4074 y(footnotes)g Fo(<)p Fq(-)g(footnotes)f(@)i([)g(number,)e(n)i(];)
+665 4171 y(number)576 4366 y(method)e(print_footnotes)g(ch)h(=)665
+4463 y(if)h(footnotes)e Fo(<>)h Fq([])h(then)f(begin)396
+4560 y(output_string)f(ch)h(")p Fo(<)p Fq(hr)g(align=left)g
+(noshade=noshade)e(width=\\"30\045\\")p Fo(>)p Fq(\\n";)396
+4657 y(output_string)h(ch)h(")p Fo(<)p Fq(dl)p Fo(>)p
+Fq(\\n";)396 4754 y(List.iter)486 4851 y(\(fun)g(\(_,n\))g(-)p
+Fo(>)p Black 3800 5278 a Fr(34)p Black eop
+%%Page: 35 35
+35 34 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 620 579 a Fq(n)45 b(#)g(footnote_to_html)d(\(self)i(:)h
+(#store_type)e(:)p Fo(>)h Fq(store_type\))f(ch\))486
+676 y(footnotes;)396 773 y(output_string)g(ch)h(")p Fo(<)p
+Fq(/dl)p Fo(>)p Fq(\\n";)665 870 y(end)486 1065 y(end)396
+1162 y(;;)-2 1614 y Fp(2.4.4.)35 b(Function)f Fc(escape_html)396
+1782 y Fv(This)21 b(function)d(con)m(v)o(erts)h(the)h(characters)f
+Fm(<)p Fv(,)h Fm(>)p Fv(,)g(&,)g(and)g(")h(to)f(their)g(HTML)g
+(representation.)e(F)o(or)h(e)o(xample,)396 1890 y Fq(escape_html)43
+b(")p Fo(<>)p Fq(")h(=)h("<>")p Fv(.)19 b(Other)g(characters)h
+(are)g(left)g(unchanged.)396 2070 y Fq(let)45 b(escape_html)e(s)h(=)486
+2167 y(Str.global_substitute)576 2264 y(\(Str.regexp)f(")p
+Fo(<)p Fq(\\\\|)p Fo(>)p Fq(\\\\|&\\\\|\\""\))576 2362
+y(\(fun)h(s)g(-)p Fo(>)665 2459 y Fq(match)g(Str.matched_string)e(s)j
+(with)755 2556 y(")p Fo(<)p Fq(")f(-)p Fo(>)h Fq("<")665
+2653 y(|)g(")p Fo(>)p Fq(")f(-)p Fo(>)h Fq(">")665
+2750 y(|)g("&")f(-)p Fo(>)h Fq("&")665 2847 y(|)g("\\"")f(-)p
+Fo(>)g Fq(""")665 2944 y(|)h(_)g(-)p Fo(>)f Fq(assert)g(false\))
+576 3042 y(s)396 3139 y(;;)-2 3591 y Fp(2.4.5.)35 b(Vir)r(tual)f(c)n
+(lass)h Fc(shared)396 3759 y Fv(This)21 b(virtual)e(class)i(is)g(the)g
+(abstract)f(superclass)g(of)f(the)i(e)o(xtension)d(classes)k(sho)n(wn)d
+(belo)n(w)-5 b(.)19 b(It)i(de\002nes)f(the)g(standard)396
+3867 y(methods)f Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)g
+Fq(set_node)p Fv(,)f(and)g(declares)h(the)g(type)g(of)g(the)g(virtual)g
+(method)e Fq(to_html)p Fv(.)i(This)396 3975 y(method)f(recursi)n(v)o
+(ely)f(tra)n(v)o(erses)i(the)g(whole)g(element)g(tree,)g(and)f(prints)h
+(the)g(con)m(v)o(erted)e(HTML)i(code)f(to)i(the)f(output)396
+4083 y(channel)f(passed)h(as)h(second)f(ar)o(gument.)d(The)j(\002rst)h
+(ar)o(gument)d(is)j(the)f(reference)f(to)h(the)g(global)f
+Fq(store)h Fv(object)g(which)396 4191 y(collects)h(the)f(footnotes.)396
+4371 y Fq(class)44 b(virtual)g(shared)g(=)486 4468 y(object)g(\(self\))
+576 4662 y(\(*)g(--)h(default_ext)e(--)h(*\))576 4857
+y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(shared)f(node)g(option\))p
+Black 3800 5278 a Fr(35)p Black eop
+%%Page: 36 36
+36 35 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 576 676 a Fq(method)43 b(clone)h(=)h({)p Fo(<)f(>)p
+Fq(})576 773 y(method)f(node)i(=)665 870 y(match)f(node)g(with)845
+967 y(None)g(-)p Fo(>)934 1065 y Fq(assert)g(false)755
+1162 y(|)h(Some)f(n)g(-)p Fo(>)h Fq(n)576 1259 y(method)e(set_node)h(n)
+h(=)665 1356 y(node)f Fo(<)p Fq(-)h(Some)f(n)576 1550
+y(\(*)g(--)h(virtual)e(--)i(*\))576 1745 y(method)e(virtual)h(to_html)g
+(:)g(store)g(-)p Fo(>)h Fq(out_channel)e(-)p Fo(>)h Fq(unit)486
+1939 y(end)396 2036 y(;;)-2 2489 y Fp(2.4.6.)35 b(Class)g
+Fc(only_data)396 2656 y Fv(This)21 b(class)g(de\002nes)f
+Fq(to_html)f Fv(such)h(that)h(the)f(character)f(data)h(of)g(the)g
+(current)f(node)g(is)i(con)m(v)o(erted)d(to)i(HTML.)g(Note)396
+2764 y(that)h Fq(self)f Fv(is)h(an)f(e)o(xtension)f(object,)g
+Fq(self)44 b(#)h(node)20 b Fv(is)h(the)f(node)f(object,)h(and)f
+Fq(self)45 b(#)f(node)g(#)h(data)20 b Fv(returns)396
+2872 y(the)g(character)f(data)h(of)g(the)h(node.)396
+3052 y Fq(class)44 b(only_data)g(=)486 3149 y(object)g(\(self\))576
+3247 y(inherit)f(shared)576 3441 y(method)g(to_html)h(store)g(ch)h(=)
+665 3538 y(output_string)e(ch)h(\(escape_html)f(\(self)h(#)h(node)f(#)h
+(data\)\))486 3635 y(end)396 3732 y(;;)-2 4185 y Fp(2.4.7.)35
+b(Class)g Fc(readme)396 4353 y Fv(This)21 b(class)g(con)m(v)o(erts)d
+(elements)i(of)g(type)g Fq(readme)g Fv(to)g(HTML.)g(Such)f(an)h
+(element)g(is)h(\(by)f(de\002nition\))e(al)o(w)o(ays)j(the)396
+4461 y(root)f(element)f(of)h(the)g(document.)e(First,)j(the)f(HTML)g
+(header)f(is)j(printed;)d(the)h Fq(title)g Fv(attrib)n(ute)f(of)h(the)h
+(element)396 4568 y(determines)e(the)i(title)f(of)g(the)h(HTML)f(page.)
+f(Some)h(aspects)g(of)g(the)g(HTML)g(page)g(can)g(be)g(con\002gured)e
+(by)h(setting)396 4676 y(certain)h(parameter)f(entities,)h(for)g(e)o
+(xample)e(the)i(background)d(color)m(,)i(the)h(te)o(xt)g(color)m(,)f
+(and)h(link)g(colors.)f(After)h(the)396 4784 y(header)m(,)f(the)h
+Fq(body)g Fv(tag,)g(and)g(the)g(headline)f(ha)n(v)o(e)g(been)h
+(printed,)f(the)h(contents)f(of)h(the)g(page)g(are)g(con)m(v)o(erted)e
+(by)p Black 3798 5278 a Fr(36)p Black eop
+%%Page: 37 37
+37 36 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(in)m(v)n(oking)e Fq(to_html)i Fv(on)g(all)g
+(children)f(of)h(the)g(current)f(node)g(\(which)h(is)h(the)f(root)f
+(node\).)g(Then,)g(the)h(footnotes)f(are)396 687 y(appended)f(to)j
+(this)f(by)g(telling)g(the)g(global)f Fq(store)h Fv(object)g(to)g
+(print)g(the)g(footnotes.)f(Finally)-5 b(,)19 b(the)h(end)g(tags)g(of)g
+(the)396 795 y(HTML)g(pages)g(are)g(printed.)396 944
+y(This)h(class)g(is)g(an)f(e)o(xample)f(ho)n(w)g(to)i(access)g(the)f(v)
+n(alue)f(of)h(an)g(attrib)n(ute:)g(The)g(v)n(alue)g(is)h(determined)d
+(by)i(in)m(v)n(oking)396 1052 y Fq(self)44 b(#)h(node)f(#)h(attribute)e
+("title")p Fv(.)20 b(As)h(this)f(attrib)n(ute)g(has)g(been)g(declared)f
+(as)i(CD)m(A)-9 b(T)h(A)20 b(and)g(as)h(being)396 1160
+y(required,)d(the)j(v)n(alue)e(has)i(al)o(w)o(ays)f(the)g(form)g
+Fq(Value)44 b(s)20 b Fv(where)g Fq(s)g Fv(is)h(the)g(string)e(v)n(alue)
+h(of)g(the)g(attrib)n(ute.)396 1310 y(Y)-9 b(ou)20 b(can)g(also)g(see)h
+(ho)n(w)f(entity)g(contents)f(can)h(be)g(accessed.)g(A)h(parameter)e
+(entity)g(object)h(can)g(be)g(look)o(ed)f(up)h(by)396
+1417 y Fq(self)44 b(#)h(node)f(#)h(dtd)f(#)h(par_entity)e("name")p
+Fv(,)20 b(and)f(by)h(in)m(v)n(oking)e Fq(replacement_text)g
+Fv(the)i(v)n(alue)g(of)396 1525 y(the)g(entity)g(is)h(returned)e(after)
+h(inner)f(parameter)g(and)g(character)g(entities)i(ha)n(v)o(e)f(been)f
+(processed.)g(Note)h(that)g(you)396 1633 y(must)g(use)h
+Fq(gen_entity)e Fv(instead)h(of)g Fq(par_entity)f Fv(to)h(access)h
+(general)e(entities.)396 1855 y Fq(class)44 b(readme)g(=)486
+1952 y(object)g(\(self\))576 2049 y(inherit)f(shared)576
+2244 y(method)g(to_html)h(store)g(ch)h(=)665 2341 y(\(*)g(output)f
+(header)f(*\))665 2438 y(output_string)396 2535 y(ch)i(")p
+Fo(<)p Fq(!DOCTYPE)e(HTML)h(PUBLIC)g(\\"-//W3C//DTD)e(HTML)j(3.2)f
+(Final//EN\\")p Fo(>)p Fq(";)665 2632 y(output_string)396
+2729 y(ch)h(")p Fo(<)p Fq(!-)f(WARNING!)f(This)h(is)h(a)g(generated)e
+(file,)h(do)g(not)h(edit!)f(-)p Fo(>)p Fq(\\n";)665 2826
+y(let)h(title)f(=)396 2923 y(match)g(self)g(#)h(node)f(#)h(attribute)e
+("title")h(with)576 3021 y(Value)g(s)g(-)p Fo(>)h Fq(s)486
+3118 y(|)g(_)f(-)p Fo(>)h Fq(assert)e(false)665 3215
+y(in)665 3312 y(let)i(html_header,)d(_)j(=)396 3409 y(try)g(\(self)f(#)
+g(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:header"\))934
+3506 y(#)i(replacement_text)396 3603 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 3701 y(let)h(html_trailer,)d(_)j(=)396
+3798 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:trailer"\))934 3895 y(#)i(replacement_text)396
+3992 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+4089 y(let)h(html_bgcolor,)d(_)j(=)396 4186 y(try)g(\(self)f(#)g(node)g
+(#)h(dtd)f(#)h(par_entity)e("readme:html:bgcolor"\))934
+4283 y(#)i(replacement_text)396 4380 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("white",)f(false)h(in)665 4478 y(let)h(html_textcolor,)d(_)j
+(=)396 4575 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:textcolor"\))934 4672 y(#)i(replacement_text)396
+4769 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+4866 y(let)h(html_alinkcolor,)d(_)i(=)p Black 3797 5278
+a Fr(37)p Black eop
+%%Page: 38 38
+38 37 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fq(try)45 b(\(self)f(#)g(node)g(#)h(dtd)f(#)h
+(par_entity)e("readme:html:alinkcolor"\))934 676 y(#)i
+(replacement_text)396 773 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 870 y(let)h(html_vlinkcolor,)d(_)i(=)396
+967 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:vlinkcolor"\))934 1065 y(#)i(replacement_text)396
+1162 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+1259 y(let)h(html_linkcolor,)d(_)j(=)396 1356 y(try)g(\(self)f(#)g
+(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:linkcolor"\))934
+1453 y(#)i(replacement_text)396 1550 y(with)f(WF_error)g(_)h(-)p
+Fo(>)f Fq("",)g(false)g(in)665 1647 y(let)h(html_background,)d(_)i(=)
+396 1745 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e
+("readme:html:background"\))934 1842 y(#)i(replacement_text)396
+1939 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665
+2133 y(output_string)f(ch)h(")p Fo(<)p Fq(html)p Fo(><)p
+Fq(header)p Fo(><)p Fq(title)p Fo(>)p Fq(\\n";)665 2230
+y(output_string)f(ch)h(\(escape_html)f(title\);)665 2327
+y(output_string)g(ch)h(")p Fo(<)p Fq(/title)p Fo(><)p
+Fq(/header)p Fo(>)p Fq(\\n";)665 2424 y(output_string)f(ch)h(")p
+Fo(<)p Fq(body)g(";)665 2522 y(List.iter)396 2619 y(\(fun)g
+(\(name,value\))f(-)p Fo(>)531 2716 y Fq(if)h(value)g
+Fo(<>)h Fq("")f(then)620 2813 y(output_string)f(ch)i(\(name)f(^)g
+("=\\"")g(^)h(escape_html)e(value)h(^)h("\\")f("\)\))396
+2910 y([)h("bgcolor",)178 b(html_bgcolor;)486 3007 y("text",)313
+b(html_textcolor;)486 3104 y("link",)g(html_linkcolor;)486
+3202 y("alink",)268 b(html_alinkcolor;)486 3299 y("vlink",)g
+(html_vlinkcolor;)396 3396 y(];)665 3493 y(output_string)43
+b(ch)h(")p Fo(>)p Fq(\\n";)665 3590 y(output_string)f(ch)h
+(html_header;)665 3687 y(output_string)f(ch)h(")p Fo(<)p
+Fq(h1)p Fo(>)p Fq(";)665 3784 y(output_string)f(ch)h(\(escape_html)f
+(title\);)665 3882 y(output_string)g(ch)h(")p Fo(<)p
+Fq(/h1)p Fo(>)p Fq(\\n";)665 3979 y(\(*)h(process)e(main)i(content:)e
+(*\))665 4076 y(List.iter)396 4173 y(\(fun)h(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+4270 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 4367
+y(\(*)g(now)f(process)g(footnotes)f(*\))665 4464 y(store)h(#)h
+(print_footnotes)d(ch;)665 4561 y(\(*)j(trailer)e(*\))665
+4659 y(output_string)g(ch)h(html_trailer;)665 4756 y(output_string)f
+(ch)h(")p Fo(<)p Fq(/html)p Fo(>)p Fq(\\n";)p Black 3800
+5278 a Fr(38)p Black eop
+%%Page: 39 39
+39 38 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 486 579 a Fq(end)396 676 y(;;)-2 1129 y Fp(2.4.8.)35
+b(Classes)h Fc(section)p Fp(,)31 b Fc(sect1)p Fp(,)g
+Fc(sect2)p Fp(,)g(and)j Fc(sect3)396 1296 y Fv(As)21
+b(the)f(con)m(v)o(ersion)e(process)i(is)h(v)o(ery)e(similar)m(,)h(the)g
+(con)m(v)o(ersion)d(classes)22 b(of)e(the)g(three)g(section)f(le)n(v)o
+(els)i(are)f(deri)n(v)o(ed)396 1404 y(from)f(the)i(more)e(general)g
+Fq(section)h Fv(class.)h(The)e(HTML)h(code)g(of)g(the)g(section)g(le)n
+(v)o(els)g(only)f(dif)n(fers)h(in)g(the)g(type)g(of)396
+1512 y(the)g(headline,)f(and)h(because)f(of)h(this)h(the)f(classes)i
+(describing)c(the)i(section)g(le)n(v)o(els)g(can)g(be)h(computed)d(by)i
+(replacing)396 1620 y(the)g(class)i(ar)o(gument)17 b
+Fq(the_tag)j Fv(of)g Fq(section)g Fv(by)f(the)i(HTML)e(name)h(of)g(the)
+g(headline)f(tag.)396 1770 y(Section)h(elements)g(are)g(con)m(v)o
+(erted)e(to)i(HTML)g(by)g(printing)e(a)j(headline)e(and)h(then)f(con)m
+(v)o(erting)f(the)i(contents)f(of)h(the)396 1878 y(element)g(recursi)n
+(v)o(ely)-5 b(.)18 b(More)h(precisely)-5 b(,)19 b(the)h(\002rst)h
+(sub-element)e(is)i(al)o(w)o(ays)f(a)h Fq(title)f Fv(element,)f(and)h
+(the)g(other)396 1985 y(elements)g(are)g(the)g(contents)g(of)g(the)g
+(section.)g(This)g(structure)f(is)j(declared)c(in)j(the)f(DTD,)g(and)g
+(it)h(is)g(guaranteed)d(that)396 2093 y(the)i(document)f(matches)g(the)
+i(DTD.)f(Because)g(of)g(this)h(the)f(title)h(node)e(can)h(be)g
+(separated)f(from)g(the)h(rest)h(without)f(an)o(y)396
+2201 y(checks.)396 2351 y(Both)g(the)h(title)g(node,)e(and)g(the)h
+(body)f(nodes)h(are)g(then)f(con)m(v)o(erted)f(to)i(HTML)g(by)g
+(calling)g Fq(to_html)f Fv(on)h(them.)396 2572 y Fq(class)44
+b(section)g(the_tag)g(=)486 2670 y(object)g(\(self\))576
+2767 y(inherit)f(shared)576 2961 y(val)h(tag)g(=)h(the_tag)576
+3155 y(method)e(to_html)h(store)g(ch)h(=)665 3252 y(let)g(sub_nodes)e
+(=)i(self)f(#)g(node)h(#)f(sub_nodes)g(in)665 3350 y(match)g(sub_nodes)
+g(with)486 3447 y(title_node)f(::)i(rest)f(-)p Fo(>)576
+3544 y Fq(output_string)e(ch)j(\(")p Fo(<)p Fq(")f(^)g(tag)h(^)f(")p
+Fo(>)p Fq(\\n"\);)576 3641 y(title_node)f(#)h(extension)g(#)g(to_html)g
+(store)g(ch;)576 3738 y(output_string)e(ch)j(\("\\n)p
+Fo(<)p Fq(/")e(^)i(tag)f(^)h(")p Fo(>)p Fq("\);)576 3835
+y(List.iter)665 3932 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)
+i(to_html)e(store)h(ch\))665 4029 y(rest)396 4127 y(|)h(_)g(-)p
+Fo(>)576 4224 y Fq(assert)e(false)486 4321 y(end)396
+4418 y(;;)396 4612 y(class)h(sect1)g(=)h(section)f("h1";;)396
+4709 y(class)g(sect2)g(=)h(section)f("h3";;)396 4807
+y(class)g(sect3)g(=)h(section)f("h4";;)p Black 3800 5278
+a Fr(39)p Black eop
+%%Page: 40 40
+40 39 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.9.)35 b(Classes)h Fc(map_tag)p
+Fp(,)31 b Fc(p)p Fp(,)i Fc(em)p Fp(,)f Fc(ul)p Fp(,)h
+Fc(li)396 751 y Fv(Se)n(v)o(eral)20 b(element)f(types)h(are)g(con)m(v)o
+(erted)e(to)i(HTML)g(by)g(simply)g(mapping)e(them)i(to)g(corresponding)
+d(HTML)396 859 y(element)j(types.)g(The)f(class)j Fq(map_tag)d
+Fv(implements)g(this,)i(and)f(the)g(class)h(ar)o(gument)d
+Fq(the_target_tag)396 967 y Fv(determines)h(the)i(tag)f(name)f(to)i
+(map)e(to.)h(The)g(output)f(consists)i(of)f(the)g(start)h(tag,)f(the)g
+(recursi)n(v)o(ely)e(con)m(v)o(erted)g(inner)396 1075
+y(elements,)i(and)g(the)g(end)f(tag.)396 1255 y Fq(class)44
+b(map_tag)g(the_target_tag)e(=)486 1352 y(object)i(\(self\))576
+1449 y(inherit)f(shared)576 1643 y(val)h(target_tag)f(=)i
+(the_target_tag)576 1838 y(method)e(to_html)h(store)g(ch)h(=)665
+1935 y(output_string)e(ch)h(\(")p Fo(<)p Fq(")g(^)h(target_tag)e(^)i(")
+p Fo(>)p Fq(\\n"\);)665 2032 y(List.iter)396 2129 y(\(fun)f(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+2226 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 2323
+y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/")g(^)h(target_tag)e(^)h(")p
+Fo(>)p Fq("\);)486 2420 y(end)396 2518 y(;;)396 2712
+y(class)g(p)h(=)g(map_tag)e("p";;)396 2809 y(class)h(em)h(=)f(map_tag)g
+("b";;)396 2906 y(class)g(ul)h(=)f(map_tag)g("ul";;)396
+3003 y(class)g(li)h(=)f(map_tag)g("li";;)-2 3456 y Fp(2.4.10.)36
+b(Class)f Fc(br)396 3624 y Fv(Element)20 b(of)g(type)f
+Fq(br)i Fv(are)f(mapped)f(to)h(the)g(same)g(HTML)g(type.)g(Note)g(that)
+g(HTML)g(forbids)f(the)h(end)g(tag)g(of)g Fq(br)p Fv(.)396
+3804 y Fq(class)44 b(br)h(=)486 3901 y(object)f(\(self\))576
+3998 y(inherit)f(shared)576 4192 y(method)g(to_html)h(store)g(ch)h(=)
+665 4289 y(output_string)e(ch)h(")p Fo(<)p Fq(br)p Fo(>)p
+Fq(\\n";)665 4387 y(List.iter)396 4484 y(\(fun)g(n)h(-)p
+Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396
+4581 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)486 4678
+y(end)396 4775 y(;;)p Black 3800 5278 a Fr(40)p Black
+eop
+%%Page: 41 41
+41 40 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.11.)36 b(Class)f Fc(code)396 751
+y Fv(The)20 b Fq(code)g Fv(type)g(is)h(con)m(v)o(erted)d(to)i(a)h
+Fq(pre)f Fv(section)g(\(preformatted)d(te)o(xt\).)i(As)i(the)g(meaning)
+d(of)i(tabs)h(is)g(unspeci\002ed)e(in)396 859 y(HTML,)h(tabs)g(are)h(e)
+o(xpanded)c(to)k(spaces.)396 1039 y Fq(class)44 b(code)g(=)486
+1136 y(object)g(\(self\))576 1233 y(inherit)f(shared)576
+1427 y(method)g(to_html)h(store)g(ch)h(=)665 1525 y(let)g(data)f(=)g
+(self)h(#)f(node)g(#)h(data)f(in)665 1622 y(\(*)h(convert)e(tabs)i(*\))
+665 1719 y(let)g(l)f(=)h(String.length)e(data)h(in)665
+1816 y(let)h(rec)f(preprocess)f(i)i(column)f(=)396 1913
+y(\(*)h(this)f(is)g(very)h(ineffective)e(but)h(comprehensive:)e(*\))396
+2010 y(if)j(i)f Fo(<)h Fq(l)g(then)486 2107 y(match)f(data.[i])f(with)
+665 2205 y('\\t')h(-)p Fo(>)396 2302 y Fq(let)h(n)f(=)h(8)g(-)f
+(\(column)g(mod)g(8\))h(in)396 2399 y(String.make)e(n)i(')g(')f(^)h
+(preprocess)e(\(i+1\))h(\(column)g(+)g(n\))576 2496 y(|)g('\\n')g(-)p
+Fo(>)396 2593 y Fq("\\n")g(^)h(preprocess)e(\(i+1\))h(0)576
+2690 y(|)g(c)h(-)p Fo(>)396 2787 y Fq(String.make)e(1)i(c)g(^)f
+(preprocess)f(\(i+1\))h(\(column)g(+)h(1\))396 2884 y(else)486
+2982 y("")665 3079 y(in)665 3176 y(output_string)e(ch)h(")p
+Fo(<)p Fq(p)p Fo(><)p Fq(pre)p Fo(>)p Fq(";)665 3273
+y(output_string)f(ch)h(\(escape_html)f(\(preprocess)g(0)i(0\)\);)665
+3370 y(output_string)e(ch)h(")p Fo(<)p Fq(/pre)p Fo(><)p
+Fq(/p)p Fo(>)p Fq(";)486 3564 y(end)396 3662 y(;;)-2
+4114 y Fp(2.4.12.)36 b(Class)f Fc(a)396 4282 y Fv(Hyperlinks,)19
+b(e)o(xpressed)g(by)g(the)i Fq(a)f Fv(element)g(type,)f(are)h(con)m(v)o
+(erted)e(to)i(the)g(HTML)g Fq(a)h Fv(type.)e(If)i(the)f(tar)o(get)f(of)
+h(the)396 4390 y(hyperlink)e(is)j(gi)n(v)o(en)d(by)i
+Fq(href)p Fv(,)g(the)g(URL)g(of)g(this)g(attrib)n(ute)g(can)g(be)g
+(used)g(directly)-5 b(.)18 b(Alternati)n(v)o(ely)-5 b(,)18
+b(the)i(tar)o(get)f(can)h(be)396 4498 y(gi)n(v)o(en)f(by)h
+Fq(readmeref)f Fv(in)i(which)e(case)i(the)f(".html")g(suf)n(\002x)f
+(must)i(be)f(added)f(to)h(the)g(\002le)h(name.)396 4647
+y(Note)f(that)h(within)f Fq(a)g Fv(only)g(#PCD)m(A)-9
+b(T)h(A)20 b(is)h(allo)n(wed,)e(so)i(the)f(contents)f(can)h(be)g(con)m
+(v)o(erted)e(directly)h(by)h(applying)396 4755 y Fq(escape_html)f
+Fv(to)i(the)f(character)f(data)h(contents.)p Black 3800
+5278 a Fr(41)p Black eop
+%%Page: 42 42
+42 41 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fq(class)44 b(a)h(=)486 676 y(object)f(\(self\))576
+773 y(inherit)f(shared)576 967 y(method)g(to_html)h(store)g(ch)h(=)665
+1065 y(output_string)e(ch)h(")p Fo(<)p Fq(a)h(";)665
+1162 y(let)g(href)f(=)396 1259 y(match)g(self)g(#)h(node)f(#)h
+(attribute)e("href")h(with)576 1356 y(Value)g(v)g(-)p
+Fo(>)h Fq(escape_html)e(v)486 1453 y(|)i(Valuelist)e(_)i(-)p
+Fo(>)f Fq(assert)g(false)486 1550 y(|)h(Implied_value)d(-)p
+Fo(>)665 1647 y Fq(begin)i(match)g(self)g(#)h(node)f(#)h(attribute)e
+("readmeref")g(with)486 1745 y(Value)h(v)h(-)p Fo(>)f
+Fq(escape_html)f(v)i(^)f(".html")396 1842 y(|)h(Valuelist)e(_)i(-)p
+Fo(>)f Fq(assert)g(false)396 1939 y(|)h(Implied_value)e(-)p
+Fo(>)576 2036 y Fq("")665 2133 y(end)665 2230 y(in)665
+2327 y(if)i(href)f Fo(<>)g Fq("")h(then)396 2424 y(output_string)e(ch)h
+(\("href=\\"")88 b(^)45 b(href)f(^)h("\\""\);)665 2522
+y(output_string)e(ch)h(")p Fo(>)p Fq(";)665 2619 y(output_string)f(ch)h
+(\(escape_html)f(\(self)h(#)h(node)f(#)h(data\)\);)665
+2716 y(output_string)e(ch)h(")p Fo(<)p Fq(/a)p Fo(>)p
+Fq(";)486 2910 y(end)396 3007 y(;;)-2 3460 y Fp(2.4.13.)36
+b(Class)f Fc(footnote)396 3628 y Fv(The)20 b Fq(footnote)g
+Fv(class)h(has)f(tw)o(o)h(methods:)e Fq(to_html)g Fv(to)i(con)m(v)o
+(ert)d(the)i(footnote)f(reference)f(to)i(HTML,)g(and)396
+3736 y Fq(footnote_to_html)e Fv(to)j(con)m(v)o(ert)d(the)i(footnote)f
+(te)o(xt)h(itself.)396 3885 y(The)g(footnote)f(reference)f(is)j(con)m
+(v)o(erted)d(to)i(a)h(local)f(hyperlink;)e(more)h(precisely)-5
+b(,)19 b(to)h(tw)o(o)h(anchor)d(tags)j(which)e(are)396
+3993 y(connected)g(with)h(each)g(other)-5 b(.)19 b(The)h(te)o(xt)g
+(anchor)f(points)h(to)g(the)g(footnote)f(anchor)m(,)f(and)h(the)i
+(footnote)d(anchor)h(points)396 4101 y(to)i(the)f(te)o(xt)g(anchor)-5
+b(.)396 4250 y(The)20 b(footnote)f(must)h(be)g(allocated)f(in)i(the)f
+Fq(store)g Fv(object.)f(By)i(allocating)e(the)h(footnote,)f(you)g(get)h
+(the)g(number)f(of)396 4358 y(the)h(footnote,)f(and)g(the)i(te)o(xt)f
+(of)f(the)i(footnote)d(is)j(stored)f(until)g(the)g(end)g(of)g(the)g
+(HTML)g(page)f(is)j(reached)c(when)i(the)396 4466 y(footnotes)f(can)h
+(be)g(printed.)f(The)h Fq(to_html)f Fv(method)g(stores)i(simply)e(the)i
+(object)e(itself,)i(such)f(that)g(the)396 4574 y Fq(footnote_to_html)e
+Fv(method)h(is)i(in)m(v)n(ok)o(ed)e(on)g(the)i(same)f(object)g(that)g
+(encountered)d(the)k(footnote.)p Black 3800 5278 a Fr(42)p
+Black eop
+%%Page: 43 43
+43 42 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black 396 579 a Fv(The)g Fq(to_html)g Fv(only)f(allocates)h(the)h
+(footnote,)d(and)h(prints)h(the)g(reference)f(anchor)m(,)f(b)n(ut)i(it)
+h(does)f(not)g(print)g(nor)396 687 y(con)m(v)o(ert)e(the)j(contents)e
+(of)h(the)g(note.)g(This)g(is)h(deferred)d(until)j(the)f(footnotes)e
+(actually)i(get)g(printed,)f(i.e.)h(the)g(recursi)n(v)o(e)396
+795 y(call)h(of)f Fq(to_html)f Fv(on)h(the)g(sub)g(nodes)g(is)h(done)e
+(by)h Fq(footnote_to_html)p Fv(.)396 944 y(Note)g(that)h(this)f
+(technique)f(does)h(not)g(w)o(ork)f(if)i(you)e(mak)o(e)h(another)f
+(footnote)f(within)i(a)h(footnote;)d(the)i(second)396
+1052 y(footnote)f(gets)h(allocated)g(b)n(ut)g(not)g(printed.)396
+1274 y Fq(class)44 b(footnote)g(=)486 1371 y(object)g(\(self\))576
+1468 y(inherit)f(shared)576 1662 y(val)h(mutable)g(footnote_number)e(=)
+j(0)576 1857 y(method)e(to_html)h(store)g(ch)h(=)665
+1954 y(let)g(number)e(=)396 2051 y(store)h(#)h(alloc_footnote)d(\(self)
+i(:)h(#shared)f(:)p Fo(>)g Fq(footnote_printer\))e(in)665
+2148 y(let)j(foot_anchor)e(=)396 2245 y("footnote")g(^)i(string_of_int)
+e(number)h(in)665 2342 y(let)h(text_anchor)e(=)396 2439
+y("textnote")g(^)i(string_of_int)e(number)h(in)665 2537
+y(footnote_number)f Fo(<)p Fq(-)h(number;)665 2634 y(output_string)f
+(ch)h(\()h(")p Fo(<)p Fq(a)f(name=\\"")g(^)g(text_anchor)f(^)i("\\")f
+(href=\\"#")g(^)441 2731 y(foot_anchor)f(^)i("\\")p Fo(>)p
+Fq([")e(^)i(string_of_int)e(number)h(^)441 2828 y("])p
+Fo(<)p Fq(/a)p Fo(>)p Fq(")g(\))576 3022 y(method)f(footnote_to_html)g
+(store)h(ch)g(=)665 3119 y(\(*)h(prerequisite:)d(we)j(are)f(in)h(a)f
+(definition)g(list)g Fo(<)p Fq(dl)p Fo(>)p Fq(...)p Fo(<)p
+Fq(/dl)p Fo(>)e Fq(*\))665 3217 y(let)j(foot_anchor)e(=)396
+3314 y("footnote")g(^)i(string_of_int)e(footnote_number)f(in)665
+3411 y(let)j(text_anchor)e(=)396 3508 y("textnote")g(^)i(string_of_int)
+e(footnote_number)f(in)665 3605 y(output_string)h(ch)h(\(")p
+Fo(<)p Fq(dt)p Fo(><)p Fq(a)g(name=\\"")f(^)i(foot_anchor)e(^)h("\\")h
+(href=\\"#")e(^)396 3702 y(text_anchor)g(^)i("\\")p Fo(>)p
+Fq([")f(^)g(string_of_int)f(footnote_number)f(^)396 3799
+y("])p Fo(<)p Fq(/a)p Fo(><)p Fq(/dt)p Fo(>)p Fq(\\n)p
+Fo(<)p Fq(dd)p Fo(>)p Fq("\);)665 3896 y(List.iter)396
+3994 y(\(fun)i(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e
+(store)h(ch\))396 4091 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665
+4188 y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/dd)p
+Fo(>)p Fq("\))486 4382 y(end)396 4479 y(;;)p Black 3800
+5278 a Fr(43)p Black eop
+%%Page: 44 44
+44 43 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black -2 583 a Fp(2.4.14.)36 b(The)d(speci\002cation)j(of)e(the)f
+(document)i(model)396 751 y Fv(This)21 b(code)e(sets)i(up)f(the)g(hash)
+g(table)g(that)h(connects)e(element)h(types)g(with)g(the)g(e)o(x)o
+(emplars)f(of)h(the)g(e)o(xtension)f(classes)396 859
+y(that)i(con)m(v)o(ert)d(the)i(elements)g(to)g(HTML.)396
+1039 y Fq(open)44 b(Pxp_yacc)396 1233 y(let)h(tag_map)e(=)486
+1330 y(make_spec_from_alist)576 1427 y(~data_exemplar:\(new)e
+(data_impl)j(\(new)g(only_data\)\))576 1525 y
+(~default_element_exemplar:\(new)39 b(element_impl)k(\(new)h
+(no_markup\)\))576 1622 y(~element_alist:)665 1719 y([)h("readme",)e
+(\(new)h(element_impl)f(\(new)h(readme\)\);)396 1816
+y("sect1",)89 b(\(new)44 b(element_impl)f(\(new)h(sect1\)\);)396
+1913 y("sect2",)89 b(\(new)44 b(element_impl)f(\(new)h(sect2\)\);)396
+2010 y("sect3",)89 b(\(new)44 b(element_impl)f(\(new)h(sect3\)\);)396
+2107 y("title",)89 b(\(new)44 b(element_impl)f(\(new)h(no_markup\)\);)
+396 2205 y("p",)269 b(\(new)44 b(element_impl)f(\(new)h(p\)\);)396
+2302 y("br",)224 b(\(new)44 b(element_impl)f(\(new)h(br\)\);)396
+2399 y("code",)134 b(\(new)44 b(element_impl)f(\(new)h(code\)\);)396
+2496 y("em",)224 b(\(new)44 b(element_impl)f(\(new)h(em\)\);)396
+2593 y("ul",)224 b(\(new)44 b(element_impl)f(\(new)h(ul\)\);)396
+2690 y("li",)224 b(\(new)44 b(element_impl)f(\(new)h(li\)\);)396
+2787 y("footnote",)f(\(new)h(element_impl)f(\(new)h(footnote)g(:)h
+(#shared)e(:)p Fo(>)i Fq(shared\)\);)396 2884 y("a",)269
+b(\(new)44 b(element_impl)f(\(new)h(a\)\);)665 2982 y(])576
+3079 y(\(\))396 3176 y(;;)-2 3678 y Fx(Notes)p Black
+396 3857 a Fv(1.)p Black 70 w(Elements)20 b(may)g(also)g(contain)f
+(processing)g(instructions.)g(Unlik)o(e)h(other)f(document)g(models,)g
+(PXP)i(separates)529 3965 y(processing)e(instructions)g(from)g(the)i
+(rest)f(of)g(the)g(te)o(xt)g(and)g(pro)o(vides)e(a)j(second)e(interf)o
+(ace)h(to)g(access)h(them)529 4073 y(\(method)e Fq(pinstr)p
+Fv(\).)g(Ho)n(we)n(v)o(er)m(,)f(there)h(is)j(a)e(parser)g(option)f(\()p
+Fq(enable_pinstr_nodes)p Fv(\))e(which)i(changes)g(the)529
+4181 y(beha)n(viour)f(of)i(the)g(parser)g(such)g(that)g(e)o(xtra)g
+(nodes)f(for)h(processing)e(instructions)i(are)g(included)e(into)i(the)
+h(tree.)529 4320 y Fi(Furthermore,)e(the)g(tree)g(does)g(normally)h
+(not)f(contain)h(nodes)g(for)e(XML)h(comments;)h(the)o(y)f(are)g
+(ignored)h(by)f(def)o(ault.)g(Again,)529 4417 y(there)g(is)g(an)g
+(option)h(\()p Fh(enable_comment_nodes)p Fi(\))25 b(changing)c(this.)p
+Black 396 4566 a Fv(2.)p Black 70 w(Due)f(to)h(the)f(typing)f(system)h
+(it)h(is)g(more)e(or)h(less)i(impossible)d(to)i(deri)n(v)o(e)d(recursi)
+n(v)o(e)h(classes)i(in)g(O'Caml.)f(T)-7 b(o)20 b(get)529
+4674 y(around)e(this,)j(it)g(is)g(common)d(practice)i(to)g(put)g(the)g
+(modi\002able)f(or)h(e)o(xtensible)f(part)h(of)g(recursi)n(v)o(e)f
+(objects)h(into)529 4782 y(parallel)g(objects.)p Black
+3800 5278 a Fr(44)p Black eop
+%%Page: 45 45
+45 44 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p
+Black Black 396 579 a Fv(3.)p Black 70 w(The)g(problem)e(is)k(that)e
+(the)g(subclass)h(is)g(usually)e(not)h(a)h(subtype)e(in)h(this)h(case)f
+(because)g(O'Caml)g(has)h(a)529 687 y(contra)n(v)n(ariant)d(subtyping)g
+(rule.)p Black 3800 5278 a Fr(45)p Black eop
+%%Page: 46 46
+46 45 bop Black Black -2 621 a Fs(Chapter)48 b(3.)f(The)h(objects)g
+(representing)g(the)-2 845 y(document)396 1093 y Fr(This)21
+b(description)e(might)h(be)g(out-of-date)o(.)e(See)i(the)g(module)f
+(interface)h(\002les)g(for)h(updated)d(information.)-2
+1470 y Fx(3.1.)39 b(The)g Fb(document)44 b Fx(c)m(lass)396
+1722 y Fq(class)g([)h('ext)f(])h(document)e(:)486 1819
+y(Pxp_types.collect_warnings)d(->)486 1916 y(object)576
+2013 y(method)j(init_xml_version)g(:)h(string)g(->)h(unit)576
+2111 y(method)e(init_root)h(:)g('ext)h(node)f(->)g(unit)576
+2305 y(method)f(xml_version)g(:)i(string)576 2402 y(method)e
+(xml_standalone)g(:)i(bool)576 2499 y(method)e(dtd)i(:)f(dtd)576
+2596 y(method)f(root)i(:)f('ext)g(node)576 2791 y(method)f(encoding)h
+(:)h(Pxp_types.rep_encoding)576 2985 y(method)e(add_pinstr)h(:)g
+(proc_instruction)e(->)j(unit)576 3082 y(method)e(pinstr)h(:)h(string)f
+(->)g(proc_instruction)e(list)576 3179 y(method)h(pinstr_names)g(:)i
+(string)f(list)576 3373 y(method)f(write)h(:)h(Pxp_types.output_stream)
+c(->)k(Pxp_types.encoding)c(->)k(unit)486 3568 y(end)396
+3665 y(;;)396 3856 y Fv(The)20 b(methods)f(be)o(ginning)f(with)i
+Fq(init_)g Fv(are)g(only)g(for)f(internal)h(use)g(of)g(the)g(parser)-5
+b(.)p Black 396 4088 a Ft(\225)p Black 60 w Fq(xml_version)p
+Fv(:)19 b(returns)h(the)g(v)o(ersion)f(string)h(at)g(the)g(be)o
+(ginning)e(of)i(the)g(document.)e(F)o(or)i(e)o(xample,)f("1.0")g(is)479
+4196 y(returned)g(if)h(the)g(document)f(be)o(gins)g(with)h
+Fo(<)p Fq(?xml)44 b(version="1.0"?)p Fo(>)p Fv(.)p Black
+396 4304 a Ft(\225)p Black 60 w Fq(xml_standalone)p Fv(:)19
+b(returns)g(the)h(boolean)f(v)n(alue)g(of)h Fq(standalone)f
+Fv(declaration)g(in)h(the)h(XML)f(declaration.)e(If)479
+4412 y(the)i Fq(standalone)g Fv(attrib)n(ute)f(is)i(missing,)f
+Fq(false)g Fv(is)h(returned.)p Black 396 4520 a Ft(\225)p
+Black 60 w Fq(dtd)p Fv(:)g(returns)e(a)i(reference)d(to)i(the)h(global)
+e(DTD)h(object.)p Black 396 4628 a Ft(\225)p Black 60
+w Fq(root)p Fv(:)g(returns)g(a)g(reference)f(to)h(the)g(root)g
+(element.)p Black 396 4736 a Ft(\225)p Black 60 w Fq(encoding)p
+Fv(:)g(returns)f(the)h(internal)g(encoding)e(of)i(the)g(document.)e
+(This)i(means)g(that)g(all)h(strings)f(of)g(which)g(the)479
+4844 y(document)e(consists)j(are)f(encoded)f(in)h(this)h(character)e
+(set.)p Black 3798 5278 a Fr(46)p Black eop
+%%Page: 47 47
+47 46 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(pinstr)p Fv(:)g(returns)f(the)i(processing)d
+(instructions)i(outside)f(the)h(DTD)h(and)e(outside)h(the)g(root)g
+(element.)f(The)479 687 y(ar)o(gument)f(passed)i(to)h(the)f(method)f
+(names)g(a)i Fr(tar)m(g)o(et)q Fv(,)g(and)e(the)h(method)f(returns)g
+(all)i(instructions)e(with)i(this)g(tar)o(get.)479 795
+y(The)f(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(inside)h
+Fo(<)p Fq(?)h Fv(and)e Fq(?)p Fo(>)p Fv(.)p Black 396
+903 a Ft(\225)p Black 60 w Fq(pinstr_names)p Fv(:)g(returns)g(the)i
+(names)e(of)h(the)h(processing)d(instructions)p Black
+396 1011 a Ft(\225)p Black 60 w Fq(add_pinstr)p Fv(:)h(adds)h(another)f
+(processing)g(instruction.)f(This)j(method)e(is)i(used)f(by)f(the)h
+(parser)g(itself)h(to)f(enter)g(the)479 1119 y(instructions)f(returned)
+g(by)h Fq(pinstr)p Fv(,)f(b)n(ut)h(you)g(can)g(also)g(enter)g
+(additional)f(instructions.)p Black 396 1226 a Ft(\225)p
+Black 60 w Fq(write)p Fv(:)h(writes)h(the)f(document)e(to)j(the)f
+(passed)g(stream)g(as)h(XML)f(te)o(xt)g(using)g(the)g(passed)g(\(e)o
+(xternal\))e(encoding.)479 1334 y(The)i(generated)f(te)o(xt)h(is)h(al)o
+(w)o(ays)f(v)n(alid)g(XML)g(and)g(can)g(be)g(parsed)g(by)f(PXP;)i(ho)n
+(we)n(v)o(er)m(,)d(the)i(te)o(xt)g(is)h(badly)479 1442
+y(formatted)e(\(this)h(is)h(not)f(a)h(pretty)e(printer\).)-2
+1861 y Fx(3.2.)39 b(The)g(c)m(lass)g(type)g Fb(node)396
+2041 y Fv(From)20 b Fq(Pxp_document)p Fv(:)396 2221 y
+Fq(type)44 b(node_type)g(=)486 2318 y(T_data)396 2415
+y(|)h(T_element)e(of)i(string)396 2512 y(|)g(T_super_root)396
+2609 y(|)g(T_pinstr)e(of)i(string)396 2706 y(|)g(T_comment)396
+2804 y Fn(and)g(some)f(other,)g(reserved)f(types)396
+2901 y Fq(;;)396 3095 y(class)h(type)g([)h('ext)f(])h(node)f(=)486
+3192 y(object)g(\('self\))576 3289 y(constraint)f('ext)h(=)h('ext)f
+(node)g(#extension)576 3484 y(\(*)g Fn(General)g(observers)f
+Fq(*\))576 3678 y(method)g(extension)h(:)g('ext)576 3775
+y(method)f(dtd)i(:)f(dtd)576 3872 y(method)f(parent)h(:)h('ext)f(node)
+576 3969 y(method)f(root)i(:)f('ext)g(node)576 4066 y(method)f
+(sub_nodes)h(:)g('ext)h(node)f(list)576 4164 y(method)f(iter_nodes)h(:)
+g(\('ext)g(node)g(-)p Fo(>)h Fq(unit\))f(-)p Fo(>)g Fq(unit)576
+4261 y(method)f(iter_nodes_sibl)g(:)889 4358 y(\('ext)h(node)h(option)e
+(-)p Fo(>)i Fq('ext)f(node)g(-)p Fo(>)g Fq('ext)h(node)f(option)g(-)p
+Fo(>)g Fq(unit\))g(-)396 4455 y Fo(>)h Fq(unit)576 4552
+y(method)e(node_type)h(:)g(node_type)576 4649 y(method)f(encoding)h(:)h
+(Pxp_types.rep_encoding)576 4746 y(method)e(data)i(:)f(string)576
+4843 y(method)f(position)h(:)h(\(string)e(*)i(int)f(*)h(int\))p
+Black 3797 5278 a Fr(47)p Black eop
+%%Page: 48 48
+48 47 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 576 579 a Fq(method)43
+b(comment)h(:)h(string)f(option)576 676 y(method)f(pinstr)h(:)h(string)
+f(-)p Fo(>)g Fq(proc_instruction)e(list)576 773 y(method)h
+(pinstr_names)g(:)i(string)f(list)576 870 y(method)f(write)h(:)h
+(Pxp_types.output_stream)c(->)k(Pxp_types.encoding)c(->)k(unit)576
+1065 y(\(*)f Fn(Attribute)f(observers)h Fq(*\))576 1259
+y(method)f(attribute)h(:)g(string)g(-)p Fo(>)h Fq(Pxp_types.att_value)
+576 1356 y(method)e(required_string_attribute)e(:)k(string)f(-)p
+Fo(>)g Fq(string)576 1453 y(method)f(optional_string_attribute)e(:)k
+(string)f(-)p Fo(>)g Fq(string)g(option)576 1550 y(method)f
+(required_list_attribute)e(:)k(string)f(-)p Fo(>)g Fq(string)g(list)576
+1647 y(method)f(optional_list_attribute)e(:)k(string)f(-)p
+Fo(>)g Fq(string)g(list)576 1745 y(method)f(attribute_names)g(:)h
+(string)g(list)576 1842 y(method)f(attribute_type)g(:)i(string)e(-)p
+Fo(>)i Fq(Pxp_types.att_type)576 1939 y(method)e(attributes)h(:)g
+(\(string)g(*)h(Pxp_types.att_value\))c(list)576 2036
+y(method)i(id_attribute_name)f(:)j(string)576 2133 y(method)e
+(id_attribute_value)f(:)j(string)576 2230 y(method)e
+(idref_attribute_names)f(:)i(string)576 2424 y(\(*)g
+Fn(Modifying)f(methods)h Fq(*\))576 2619 y(method)f(add_node)h(:)h
+(?force:bool)e(-)p Fo(>)h Fq('ext)g(node)g(-)p Fo(>)h
+Fq(unit)576 2716 y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p
+Fo(>)j Fq(unit)576 2813 y(method)e(delete)h(:)h(unit)576
+2910 y(method)e(set_nodes)h(:)g('ext)h(node)f(list)g(-)p
+Fo(>)g Fq(unit)576 3007 y(method)f(quick_set_attributes)f(:)j(\(string)
+e(*)i(Pxp_types.att_value\))c(list)j(-)p Fo(>)h Fq(unit)576
+3104 y(method)e(set_comment)g(:)i(string)f(option)g(-)p
+Fo(>)g Fq(unit)576 3299 y(\(*)g Fn(Cloning)g(methods)f
+Fq(*\))576 3493 y(method)g(orphaned_clone)g(:)i('self)576
+3590 y(method)e(orphaned_flat_clone)f(:)j('self)576 3687
+y(method)e(create_element)g(:)1024 3784 y(?position:\(string)f(*)j(int)
+f(*)h(int\))f(-)p Fo(>)1024 3882 y Fq(dtd)g(-)p Fo(>)h
+Fq(node_type)e(-)p Fo(>)h Fq(\(string)g(*)h(string\))e(list)h(-)p
+Fo(>)1203 3979 y Fq('ext)g(node)576 4076 y(method)f(create_data)g(:)i
+(dtd)f(-)p Fo(>)h Fq(string)f(-)p Fo(>)g Fq('ext)g(node)576
+4173 y(method)f(keep_always_whitespace_mode)e(:)j(unit)576
+4367 y(\(*)g Fn(Validating)f(methods)h Fq(*\))576 4561
+y(method)f(local_validate)g(:)i(?use_dfa:bool)d(->)j(unit)f(->)g(unit)
+576 4756 y(\(*)g(...)g(Internal)g(methods)g(are)g(undocumented.)f(*\))p
+Black 3800 5278 a Fr(48)p Black eop
+%%Page: 49 49
+49 48 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(end)396
+676 y(;;)396 867 y Fv(In)g(the)g(module)f Fq(Pxp_types)g
+Fv(you)h(can)g(\002nd)g(another)e(type)i(de\002nition)f(that)h(is)i
+(important)c(in)j(this)f(conte)o(xt:)396 1047 y Fq(type)44
+b(Pxp_types.att_value)e(=)576 1144 y(Value)223 b(of)44
+b(string)486 1241 y(|)h(Valuelist)e(of)h(string)g(list)486
+1339 y(|)h(Implied_value)396 1436 y(;;)-2 1847 y Fp(3.2.1.)35
+b(The)f(structure)f(of)g(document)i(trees)396 2015 y
+Fv(A)21 b(node)e(represents)g(either)h(an)g(element)g(or)g(a)g
+(character)f(data)h(section.)g(There)g(are)g(tw)o(o)g(classes)h
+(implementing)d(the)396 2122 y(tw)o(o)j(aspects)f(of)g(nodes:)g
+Fq(element_impl)e Fv(and)i Fq(data_impl)p Fv(.)f(The)h(latter)g(class)h
+(does)f(not)g(implement)f(all)i(methods)396 2230 y(because)f(some)g
+(methods)f(do)h(not)g(mak)o(e)f(sense)i(for)e(data)h(nodes.)396
+2380 y(\(Note:)g(PXP)h(also)g(supports)e(a)h(mode)g(which)f(forces)h
+(that)g(processing)f(instructions)g(and)h(comments)f(are)396
+2488 y(represented)g(as)i(nodes)e(of)h(the)g(document)e(tree.)i(Ho)n
+(we)n(v)o(er)m(,)e(these)j(nodes)e(are)h(instances)g(of)g
+Fq(element_impl)f Fv(with)396 2596 y(node)g(types)h Fq(T_pinstr)g
+Fv(and)f Fq(T_comment)p Fv(,)g(respecti)n(v)o(ely)-5
+b(.)18 b(This)j(mode)e(must)h(be)g(e)o(xplicitly)g(con\002gured;)d(the)
+k(basic)396 2704 y(representation)d(kno)n(ws)i(only)f(element)h(and)f
+(data)h(nodes.\))396 2853 y(The)g(follo)n(wing)f(\002gure)g(\()p
+Fr(A)h(tr)m(ee)h(with)g(element)f(nodes,)f(data)g(nodes,)h(and)f
+(attrib)n(utes)p Fv(\))h(sho)n(ws)g(an)g(e)o(xample)f(ho)n(w)h(a)396
+2961 y(tree)g(is)i(constructed)c(from)h(element)h(and)f(data)i(nodes.)e
+(The)h(circular)f(areas)h(represent)f(element)h(nodes)f(whereas)h(the)
+396 3069 y(o)o(v)n(als)f(denote)f(data)i(nodes.)e(Only)h(elements)g
+(may)g(ha)n(v)o(e)g(subnodes;)f(data)h(nodes)g(are)g(al)o(w)o(ays)h
+(lea)n(v)o(es)f(of)h(the)f(tree.)g(The)396 3177 y(subnodes)g(of)h(an)g
+(element)g(can)g(be)g(either)g(element)f(or)h(data)g(nodes;)g(in)g
+(both)f(cases)i(the)g(O'Caml)f(objects)g(storing)f(the)396
+3285 y(nodes)h(ha)n(v)o(e)f(the)i(class)g(type)e Fq(node)p
+Fv(.)396 3434 y(Attrib)n(utes)h(\(the)g(clouds)g(in)g(the)g(picture\))f
+(are)h(not)g(directly)g(inte)o(grated)e(into)i(the)g(tree;)h(there)e
+(is)i(al)o(w)o(ays)g(an)f(e)o(xtra)g(link)396 3542 y(to)h(the)f(attrib)
+n(ute)g(list.)h(This)f(is)h(also)g(true)f(for)f(processing)g
+(instructions)g(\(not)h(sho)n(wn)f(in)h(the)h(picture\).)d(This)j
+(means)396 3650 y(that)g(there)e(are)h(separated)g(access)g(methods)g
+(for)f(attrib)n(utes)h(and)g(processing)f(instructions.)p
+Black 3800 5278 a Fr(49)p Black eop
+%%Page: 50 50
+50 49 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-1.)f(A)i
+(tr)o(ee)e(with)i(element)f(nodes,)h(data)e(nodes,)i(and)f(attrib)n
+(utes)396 2578 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 2578 a @beginspecial 0 @llx 0 @lly
+329 @urx 218 @ury 3290 @rwi @setspecial
+%%BeginDocument: pic/node_term.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_term.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 329 218
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-1.0 251.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+/reencdict 12 dict def /ReEncode { reencdict begin
+/newcodesandnames exch def /newfontname exch def /basefontname exch def
+/basefontdict basefontname findfont def /newfont basefontdict maxlength dict def
+basefontdict { exch dup /FID ne { dup /Encoding eq
+{ exch dup length array copy newfont 3 1 roll put }
+{ exch newfont 3 1 roll put } ifelse } { pop pop } ifelse } forall
+newfont /FontName newfontname put newcodesandnames aload pop
+128 1 255 { newfont /Encoding get exch /.notdef put } for
+newcodesandnames length 2 idiv { newfont /Encoding get 3 1 roll put } repeat
+newfontname newfont definefont pop end } def
+/isovec [
+8#200 /grave 8#201 /acute 8#202 /circumflex 8#203 /tilde
+8#204 /macron 8#205 /breve 8#206 /dotaccent 8#207 /dieresis
+8#210 /ring 8#211 /cedilla 8#212 /hungarumlaut 8#213 /ogonek 8#214 /caron
+8#220 /dotlessi 8#230 /oe 8#231 /OE
+8#240 /space 8#241 /exclamdown 8#242 /cent 8#243 /sterling
+8#244 /currency 8#245 /yen 8#246 /brokenbar 8#247 /section 8#250 /dieresis
+8#251 /copyright 8#252 /ordfeminine 8#253 /guillemotleft 8#254 /logicalnot
+8#255 /endash 8#256 /registered 8#257 /macron 8#260 /degree 8#261 /plusminus
+8#262 /twosuperior 8#263 /threesuperior 8#264 /acute 8#265 /mu 8#266 /paragraph
+8#267 /periodcentered 8#270 /cedilla 8#271 /onesuperior 8#272 /ordmasculine
+8#273 /guillemotright 8#274 /onequarter 8#275 /onehalf
+8#276 /threequarters 8#277 /questiondown 8#300 /Agrave 8#301 /Aacute
+8#302 /Acircumflex 8#303 /Atilde 8#304 /Adieresis 8#305 /Aring
+8#306 /AE 8#307 /Ccedilla 8#310 /Egrave 8#311 /Eacute
+8#312 /Ecircumflex 8#313 /Edieresis 8#314 /Igrave 8#315 /Iacute
+8#316 /Icircumflex 8#317 /Idieresis 8#320 /Eth 8#321 /Ntilde 8#322 /Ograve
+8#323 /Oacute 8#324 /Ocircumflex 8#325 /Otilde 8#326 /Odieresis 8#327 /multiply
+8#330 /Oslash 8#331 /Ugrave 8#332 /Uacute 8#333 /Ucircumflex
+8#334 /Udieresis 8#335 /Yacute 8#336 /Thorn 8#337 /germandbls 8#340 /agrave
+8#341 /aacute 8#342 /acircumflex 8#343 /atilde 8#344 /adieresis 8#345 /aring
+8#346 /ae 8#347 /ccedilla 8#350 /egrave 8#351 /eacute
+8#352 /ecircumflex 8#353 /edieresis 8#354 /igrave 8#355 /iacute
+8#356 /icircumflex 8#357 /idieresis 8#360 /eth 8#361 /ntilde 8#362 /ograve
+8#363 /oacute 8#364 /ocircumflex 8#365 /otilde 8#366 /odieresis 8#367 /divide
+8#370 /oslash 8#371 /ugrave 8#372 /uacute 8#373 /ucircumflex
+8#374 /udieresis 8#375 /yacute 8#376 /thorn 8#377 /ydieresis] def
+/Helvetica-Bold /Helvetica-Bold-iso isovec ReEncode
+/Helvetica /Helvetica-iso isovec ReEncode
+/Helvetica-Oblique /Helvetica-Oblique-iso isovec ReEncode
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5962 m -1000 -1000 l 7537 -1000 l 7537 5962 l cp clip
+ 0.05039 0.05039 sc
+% Polyline
+7.500 slw
+n 1770 2700 m 1665 2700 1665 3045 105 arcto 4 {pop} repeat
+ 1665 3150 2730 3150 105 arcto 4 {pop} repeat
+ 2835 3150 2835 2805 105 arcto 4 {pop} repeat
+ 2835 2700 1770 2700 105 arcto 4 {pop} repeat
+ cp gs col7 0.75 shd ef gr gs col0 s gr
+% Ellipse
+n 2250 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1575 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2925 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 900 2925 242 242 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+n 420 3825 m 315 3825 315 4170 105 arcto 4 {pop} repeat
+ 315 4275 1380 4275 105 arcto 4 {pop} repeat
+ 1485 4275 1485 3930 105 arcto 4 {pop} repeat
+ 1485 3825 420 3825 105 arcto 4 {pop} repeat
+ cp gs col7 0.75 shd ef gr gs col0 s gr
+% Polyline
+n 2085 1275 m 1582 1807 l gs col0 s gr
+% Polyline
+n 2407 1297 m 2940 1800 l gs col0 s gr
+% Polyline
+n 1417 2190 m 900 2692 l gs col0 s gr
+% Polyline
+n 1740 2190 m 2257 2700 l gs col0 s gr
+% Polyline
+n 892 3180 m 892 3825 l gs col0 s gr
+% Polyline
+n 45 675 m 6525 675 l 6525 4950 l 45 4950 l cp gs col0 s gr
+% Polyline
+n 2250 3600 m 2263 3597 l 2277 3594 l 2293 3592 l 2309 3589 l 2326 3586 l
+ 2344 3583 l 2362 3580 l 2381 3578 l 2399 3575 l 2418 3572 l
+ 2436 3569 l 2454 3566 l 2471 3563 l 2488 3561 l 2504 3558 l
+ 2520 3555 l 2537 3552 l 2555 3548 l 2571 3545 l 2588 3541 l
+ 2604 3537 l 2621 3533 l 2637 3528 l 2653 3524 l 2669 3520 l
+ 2684 3517 l 2700 3514 l 2715 3512 l 2730 3510 l 2745 3510 l
+ 2762 3511 l 2777 3512 l 2793 3514 l 2807 3517 l 2821 3520 l
+ 2835 3524 l 2849 3528 l 2863 3532 l 2877 3537 l 2893 3542 l
+ 2908 3548 l 2925 3555 l 2938 3561 l 2951 3568 l 2965 3575 l
+ 2978 3584 l 2992 3593 l 3007 3602 l 3021 3612 l 3035 3623 l
+ 3050 3633 l 3064 3643 l 3079 3652 l 3093 3661 l 3108 3670 l
+ 3122 3677 l 3136 3684 l 3150 3690 l 3166 3696 l 3182 3701 l
+ 3198 3706 l 3214 3710 l 3230 3713 l 3246 3716 l 3263 3719 l
+ 3279 3721 l 3295 3724 l 3311 3726 l 3327 3729 l 3343 3731 l
+ 3359 3733 l 3375 3735 l 3391 3736 l 3407 3737 l 3423 3738 l
+ 3439 3738 l 3455 3738 l 3471 3738 l 3488 3737 l 3504 3737 l
+ 3520 3736 l 3536 3736 l 3552 3735 l 3568 3735 l 3584 3735 l
+ 3600 3735 l 3616 3735 l 3632 3735 l 3648 3734 l 3663 3734 l
+ 3678 3733 l 3693 3732 l 3708 3731 l 3723 3730 l 3739 3729 l
+ 3755 3729 l 3771 3729 l 3788 3730 l 3806 3732 l 3825 3735 l
+ 3840 3738 l 3856 3741 l 3874 3745 l 3892 3749 l 3911 3753 l
+ 3931 3757 l 3951 3762 l 3972 3767 l 3993 3772 l 4014 3777 l
+ 4034 3782 l 4054 3787 l 4072 3793 l 4089 3799 l 4105 3805 l
+ 4119 3811 l 4130 3818 l 4140 3825 l 4150 3835 l 4157 3846 l
+ 4161 3858 l 4163 3870 l 4164 3883 l 4163 3897 l 4161 3911 l
+ 4159 3925 l 4156 3939 l 4154 3952 l 4151 3966 l 4148 3979 l
+ 4144 3992 l 4140 4005 l 4135 4018 l 4128 4031 l 4121 4045 l
+ 4112 4058 l 4104 4073 l 4095 4087 l 4085 4101 l 4075 4116 l
+ 4065 4129 l 4055 4143 l 4043 4155 l 4032 4166 l 4019 4176 l
+ 4005 4185 l 3992 4192 l 3978 4197 l 3963 4202 l 3947 4206 l
+ 3930 4210 l 3913 4213 l 3896 4216 l 3878 4218 l 3861 4220 l
+ 3843 4222 l 3825 4224 l 3807 4226 l 3789 4228 l 3771 4229 l
+ 3753 4230 l 3735 4230 l 3717 4230 l 3698 4228 l 3678 4226 l
+ 3659 4224 l 3639 4220 l 3619 4216 l 3598 4212 l 3578 4208 l
+ 3557 4203 l 3536 4199 l 3516 4195 l 3496 4191 l 3477 4189 l
+ 3457 4187 l 3438 4185 l 3420 4185 l 3402 4185 l 3384 4186 l
+ 3367 4188 l 3350 4190 l 3333 4193 l 3317 4196 l 3301 4200 l
+ 3285 4203 l 3269 4207 l 3253 4211 l 3237 4214 l 3220 4218 l
+ 3203 4221 l 3186 4224 l 3168 4227 l 3150 4230 l 3132 4233 l
+ 3113 4236 l 3094 4239 l 3074 4242 l 3055 4246 l 3035 4249 l
+ 3015 4253 l 2995 4257 l 2974 4260 l 2954 4264 l 2934 4267 l
+ 2914 4270 l 2894 4272 l 2874 4274 l 2855 4275 l 2835 4275 l
+ 2815 4275 l 2795 4274 l 2775 4272 l 2755 4270 l 2734 4268 l
+ 2713 4265 l 2692 4262 l 2671 4259 l 2650 4256 l 2630 4252 l
+ 2609 4249 l 2590 4245 l 2571 4242 l 2553 4238 l 2536 4234 l
+ 2520 4230 l 2503 4225 l 2487 4219 l 2473 4213 l 2460 4207 l
+ 2448 4200 l 2437 4192 l 2426 4185 l 2415 4178 l 2404 4170 l
+ 2393 4163 l 2380 4157 l 2368 4151 l 2354 4145 l 2340 4140 l
+ 2325 4135 l 2310 4131 l 2294 4128 l 2277 4125 l 2260 4122 l
+ 2243 4120 l 2225 4118 l 2208 4115 l 2191 4113 l 2174 4110 l
+ 2158 4107 l 2143 4104 l 2128 4100 l 2115 4095 l 2101 4089 l
+ 2087 4083 l 2074 4076 l 2061 4070 l 2049 4063 l 2037 4056 l
+ 2025 4049 l 2014 4042 l 2004 4034 l 1995 4025 l 1987 4016 l
+ 1980 4005 l 1975 3993 l 1972 3980 l 1971 3965 l 1970 3949 l
+ 1971 3932 l 1972 3915 l 1973 3898 l 1974 3881 l 1976 3865 l
+ 1977 3850 l 1978 3837 l 1980 3825 l 1983 3812 l 1986 3801 l
+ 1990 3792 l 1994 3784 l 1998 3776 l 2003 3768 l 2008 3761 l
+ 2013 3752 l 2019 3744 l 2025 3735 l 2032 3726 l 2040 3717 l
+ 2048 3707 l 2057 3698 l 2066 3688 l 2075 3678 l 2084 3669 l
+ 2094 3660 l 2104 3652 l 2115 3645 l 2127 3639 l 2138 3633 l
+ 2150 3628 l 2162 3624 l 2174 3620 l 2186 3617 l 2200 3613 l
+ 2214 3609 l 2231 3604 l cp gs col0 s gr
+% Polyline
+n 3645 1080 m 3660 1077 l 3677 1074 l 3694 1071 l 3713 1068 l 3733 1065 l
+ 3754 1063 l 3775 1060 l 3798 1058 l 3820 1056 l 3843 1053 l
+ 3866 1051 l 3889 1049 l 3912 1047 l 3934 1045 l 3955 1043 l
+ 3976 1041 l 3996 1039 l 4015 1038 l 4033 1036 l 4050 1035 l
+ 4071 1034 l 4090 1033 l 4109 1032 l 4127 1032 l 4144 1031 l
+ 4161 1031 l 4177 1031 l 4193 1031 l 4209 1031 l 4225 1031 l
+ 4241 1031 l 4257 1032 l 4273 1032 l 4289 1033 l 4304 1034 l
+ 4320 1035 l 4337 1037 l 4354 1039 l 4371 1041 l 4387 1044 l
+ 4403 1047 l 4419 1050 l 4435 1053 l 4450 1057 l 4466 1060 l
+ 4481 1063 l 4497 1067 l 4513 1071 l 4529 1075 l 4545 1080 l
+ 4561 1085 l 4577 1091 l 4592 1097 l 4607 1103 l 4622 1110 l
+ 4637 1118 l 4651 1125 l 4666 1132 l 4681 1140 l 4697 1147 l
+ 4713 1153 l 4731 1159 l 4750 1165 l 4770 1170 l 4787 1174 l
+ 4804 1177 l 4823 1180 l 4842 1182 l 4863 1184 l 4884 1186 l
+ 4906 1188 l 4928 1189 l 4950 1190 l 4972 1192 l 4994 1193 l
+ 5016 1195 l 5037 1197 l 5058 1200 l 5077 1203 l 5096 1206 l
+ 5113 1210 l 5130 1215 l 5148 1221 l 5165 1228 l 5181 1235 l
+ 5197 1242 l 5212 1250 l 5228 1259 l 5243 1267 l 5257 1276 l
+ 5272 1285 l 5286 1294 l 5299 1303 l 5312 1312 l 5324 1322 l
+ 5336 1331 l 5346 1340 l 5355 1350 l 5365 1363 l 5373 1378 l
+ 5380 1392 l 5386 1408 l 5390 1424 l 5394 1440 l 5398 1456 l
+ 5401 1472 l 5402 1488 l 5403 1502 l 5403 1517 l 5400 1530 l
+ 5395 1543 l 5389 1555 l 5381 1568 l 5372 1580 l 5363 1592 l
+ 5354 1604 l 5343 1616 l 5331 1627 l 5318 1638 l 5303 1648 l
+ 5286 1657 l 5265 1665 l 5251 1669 l 5235 1673 l 5219 1677 l
+ 5201 1680 l 5182 1683 l 5162 1685 l 5141 1688 l 5119 1690 l
+ 5097 1692 l 5075 1694 l 5053 1696 l 5030 1697 l 5008 1699 l
+ 4986 1701 l 4964 1703 l 4943 1704 l 4921 1706 l 4901 1707 l
+ 4880 1709 l 4860 1710 l 4840 1711 l 4819 1712 l 4799 1713 l
+ 4779 1713 l 4758 1713 l 4738 1714 l 4717 1714 l 4697 1714 l
+ 4676 1714 l 4655 1714 l 4635 1714 l 4614 1714 l 4594 1714 l
+ 4573 1714 l 4553 1713 l 4533 1713 l 4513 1713 l 4494 1712 l
+ 4474 1711 l 4455 1710 l 4434 1709 l 4413 1707 l 4392 1705 l
+ 4372 1703 l 4351 1701 l 4331 1698 l 4311 1695 l 4291 1692 l
+ 4271 1690 l 4251 1687 l 4231 1684 l 4211 1681 l 4191 1678 l
+ 4172 1675 l 4152 1673 l 4133 1670 l 4114 1668 l 4095 1665 l
+ 4074 1662 l 4053 1659 l 4033 1657 l 4012 1654 l 3992 1651 l
+ 3972 1648 l 3951 1645 l 3931 1643 l 3911 1640 l 3891 1637 l
+ 3872 1634 l 3852 1631 l 3833 1628 l 3815 1626 l 3797 1623 l
+ 3780 1620 l 3761 1617 l 3743 1614 l 3725 1611 l 3708 1608 l
+ 3692 1605 l 3675 1602 l 3659 1600 l 3643 1597 l 3627 1594 l
+ 3612 1591 l 3597 1587 l 3582 1584 l 3568 1580 l 3555 1575 l
+ 3541 1569 l 3527 1563 l 3514 1556 l 3501 1550 l 3489 1543 l
+ 3477 1536 l 3465 1529 l 3454 1522 l 3444 1514 l 3435 1505 l
+ 3427 1496 l 3420 1485 l 3415 1473 l 3412 1460 l 3411 1445 l
+ 3410 1430 l 3411 1414 l 3412 1397 l 3413 1380 l 3414 1364 l
+ 3416 1348 l 3417 1333 l 3418 1318 l 3420 1305 l 3423 1290 l
+ 3425 1275 l 3428 1261 l 3431 1247 l 3434 1233 l 3437 1220 l
+ 3442 1207 l 3447 1194 l 3455 1182 l 3465 1170 l 3474 1162 l
+ 3483 1155 l 3493 1148 l 3504 1141 l 3515 1134 l 3526 1127 l
+ 3538 1121 l 3550 1114 l 3563 1108 l 3577 1102 l 3591 1096 l
+ 3607 1090 l 3625 1085 l cp gs col0 s gr
+% Polyline
+n 2475 1215 m 2477 1217 l 2482 1221 l 2491 1229 l 2503 1239 l 2517 1252 l
+ 2534 1267 l 2552 1282 l 2570 1296 l 2588 1310 l 2605 1322 l
+ 2621 1332 l 2638 1342 l 2655 1350 l 2669 1356 l 2684 1362 l
+ 2700 1368 l 2717 1374 l 2734 1380 l 2752 1386 l 2770 1392 l
+ 2789 1398 l 2808 1403 l 2827 1409 l 2846 1415 l 2865 1420 l
+ 2884 1425 l 2902 1429 l 2920 1433 l 2937 1436 l 2954 1438 l
+ 2970 1440 l 2988 1441 l 3006 1441 l 3024 1440 l 3041 1439 l
+ 3059 1437 l 3076 1434 l 3094 1431 l 3111 1428 l 3129 1425 l
+ 3146 1421 l 3162 1417 l 3179 1414 l 3195 1409 l 3211 1405 l
+ 3226 1400 l 3240 1395 l 3256 1388 l 3271 1380 l 3287 1370 l
+ 3304 1358 l 3322 1344 l 3340 1329 l 3359 1314 l 3376 1299 l
+ 3391 1286 l 3404 1275 l 3412 1267 l 3418 1262 l 3420 1260 l gs col0 s gr
+% Polyline
+n 1125 3060 m 1126 3063 l 1127 3068 l 1129 3078 l 1132 3093 l 1136 3112 l
+ 1141 3135 l 1146 3162 l 1153 3190 l 1159 3219 l 1166 3248 l
+ 1173 3275 l 1180 3301 l 1187 3324 l 1193 3345 l 1200 3364 l
+ 1207 3381 l 1215 3397 l 1224 3414 l 1234 3429 l 1245 3444 l
+ 1256 3459 l 1267 3473 l 1279 3486 l 1291 3499 l 1304 3512 l
+ 1316 3525 l 1329 3537 l 1342 3550 l 1355 3562 l 1368 3574 l
+ 1382 3585 l 1396 3596 l 1410 3607 l 1425 3617 l 1441 3626 l
+ 1457 3635 l 1473 3644 l 1490 3653 l 1507 3661 l 1524 3669 l
+ 1542 3677 l 1559 3685 l 1577 3692 l 1595 3700 l 1613 3706 l
+ 1631 3713 l 1649 3718 l 1668 3723 l 1687 3727 l 1704 3730 l
+ 1723 3732 l 1743 3733 l 1764 3734 l 1788 3734 l 1814 3733 l
+ 1841 3732 l 1869 3731 l 1898 3729 l 1926 3727 l 1952 3725 l
+ 1975 3724 l 1993 3722 l 2008 3721 l 2017 3721 l 2022 3720 l
+ 2025 3720 l gs col0 s gr
+/Helvetica-iso ff 180.00 scf sf
+3600 1260 m
+gs 1 -1 sc (attributes:) col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+3600 1485 m
+gs 1 -1 sc ("att" -> Value "apple") col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+2250 3780 m
+gs 1 -1 sc (attributes:) col0 sh gr
+/Helvetica-Oblique-iso ff 180.00 scf sf
+390 4725 m
+gs 1 -1 sc (<a att="apple"><b><a att="orange">An orange</a>Cherries</b><c/></a>) col0 sh gr
+/Helvetica-iso ff 180.00 scf sf
+2250 4005 m
+gs 1 -1 sc ("att" -> Value "orange") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+1815 3015 m
+gs 1 -1 sc ("Cherries") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+375 4125 m
+gs 1 -1 sc ("An orange") col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+750 2985 m
+gs 1 -1 sc (<a>) col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+1410 2085 m
+gs 1 -1 sc (<b>) col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+2790 2070 m
+gs 1 -1 sc (<c>) col0 sh gr
+/Helvetica-Bold-iso ff 180.00 scf sf
+2100 1200 m
+gs 1 -1 sc (<a>) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 2578 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 2578 a 357 x Fv(Only)g(elements,)g(data)g
+(sections,)g(attrib)n(utes)g(and)g(processing)e(instructions)i(\(and)f
+(comments,)g(if)h(con\002gured\))e(can,)396 3043 y(directly)i(or)g
+(indirectly)-5 b(,)18 b(occur)h(in)h(the)h(document)d(tree.)i(It)g(is)h
+(impossible)f(to)g(add)g(entity)g(references)f(to)h(the)g(tree;)g(if)
+396 3151 y(the)g(parser)g(\002nds)g(such)g(a)h(reference,)d(not)i(the)g
+(reference)f(as)i(such)f(b)n(ut)g(the)g(referenced)e(te)o(xt)i(\(i.e.)g
+(the)g(tree)396 3259 y(representing)e(the)j(structured)d(te)o(xt\))i
+(is)h(included)e(in)h(the)g(tree.)396 3409 y(Note)g(that)h(the)f
+(parser)f(collapses)i(as)g(much)e(data)h(material)g(into)g(one)f(data)h
+(node)f(as)i(possible)f(such)g(that)g(there)g(are)396
+3517 y(normally)f(ne)n(v)o(er)g(tw)o(o)h(adjacent)f(data)i(nodes.)e
+(This)h(in)m(v)n(ariant)f(is)i(enforced)d(e)n(v)o(en)h(if)i(data)f
+(material)f(is)j(included)c(by)396 3625 y(entity)i(references)f(or)h
+(CD)m(A)-9 b(T)h(A)20 b(sections,)g(or)g(if)h(a)f(data)g(sequence)f(is)
+j(interrupted)c(by)h(comments.)g(So)i Fq(a)44 b(&)g(b)396
+3732 y Fo(<)p Fq(-)h(comment)e(-)p Fo(>)i Fq(c)f Fo(<)p
+Fq(![CDATA[)g Fo(<>)g Fq(d]])p Fo(>)20 b Fv(is)h(represented)d(by)i
+(only)g(one)f(data)h(node,)f(for)h(instance.)396 3840
+y(Ho)n(we)n(v)o(er)m(,)e(you)i(can)g(create)g(document)e(trees)i
+(manually)f(which)h(break)f(this)i(in)m(v)n(ariant;)d(it)j(is)g(only)f
+(the)g(w)o(ay)g(the)396 3948 y(parser)g(forms)f(the)h(tree.)p
+Black 3800 5278 a Fr(50)p Black eop
+%%Page: 51 51
+51 50 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-2.)f
+(Nodes)h(ar)o(e)g(doubly)g(link)o(ed)i(tr)o(ees)396 1537
+y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1537 a @beginspecial 0 @llx 0 @lly 138 @urx 93
+@ury 1380 @rwi @setspecial
+%%BeginDocument: pic/node_general.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_general.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 138 93
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-22.0 126.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 3487 m -1000 -1000 l 4162 -1000 l 4162 3487 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2025 2025 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1350 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2700 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+1743 1345 m 1845 1275 l 1788 1385 l 1877 1284 l 1832 1244 l cp
+clip
+n 1380 1800 m 1845 1275 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1743 1345 m 1845 1275 l 1788 1385 l 1765 1365 l 1743 1345 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+1384 1745 m 1282 1815 l 1339 1705 l 1250 1807 l 1295 1846 l cp
+clip
+n 1815 1207 m 1282 1815 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1384 1745 m 1282 1815 l 1339 1705 l 1361 1725 l 1384 1745 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2025 1470 m 2055 1350 l 2085 1470 l 2085 1335 l 2025 1335 l cp
+clip
+n 2055 1792 m 2055 1350 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2025 1470 m 2055 1350 l 2085 1470 l 2055 1470 l 2025 1470 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2010 1687 m 1980 1807 l 1950 1687 l 1950 1822 l 2010 1822 l cp
+clip
+n 1980 1350 m 1980 1807 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2010 1687 m 1980 1807 l 1950 1687 l 1980 1687 l 2010 1687 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2511 1750 m 2550 1867 l 2461 1782 l 2533 1896 l 2583 1864 l cp
+clip
+n 2190 1297 m 2550 1867 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2511 1750 m 2550 1867 l 2461 1782 l 2486 1766 l 2511 1750 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2262 1353 m 2220 1237 l 2312 1320 l 2237 1208 l 2187 1241 l cp
+clip
+n 2602 1807 m 2220 1237 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2262 1353 m 2220 1237 l 2312 1320 l 2287 1337 l 2262 1353 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+n 450 675 m 3150 675 l 3150 2475 l 450 2475 l cp gs col0 s gr
+/Courier ff 150.00 scf sf
+2377 1342 m
+gs 1 -1 sc (parent) col0 sh gr
+/Courier ff 150.00 scf sf
+645 1628 m
+gs 1 -1 sc (sub_nodes) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1537 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1537 a 357 x Fv(The)e(node)f(tree)h(has)h
+(links)f(in)g(both)g(directions:)f(Ev)o(ery)g(node)g(has)h(a)h(link)f
+(to)g(its)i(parent)d(\(if)h(an)o(y\),)f(and)g(it)i(has)g(links)f(to)396
+2002 y(the)g(subnodes)f(\(see)i(\002gure)e Fr(Nodes)h(ar)m(e)h(doubly)d
+(link)o(ed)i(tr)m(ees)p Fv(\).)h(Ob)o(viously)-5 b(,)18
+b(this)i(doubly-link)o(ed)d(structure)396 2110 y(simpli\002es)k(the)f
+(na)n(vigation)e(in)j(the)f(tree;)g(b)n(ut)g(has)h(also)f(some)g
+(consequences)f(for)g(the)h(possible)g(operations)f(on)h(trees.)396
+2259 y(Because)h(e)n(v)o(ery)d(node)i(must)g(ha)n(v)o(e)f(at)i(most)f
+Fr(one)g Fv(parent)f(node,)g(operations)g(are)h(ille)o(gal)g(if)g(the)o
+(y)f(violate)h(this)396 2367 y(condition.)e(The)i(follo)n(wing)f
+(\002gure)g(\()p Fr(A)h(node)g(can)f(only)h(be)g(added)f(if)i(it)g(is)g
+(a)f(r)l(oot)q Fv(\))g(sho)n(ws)h(on)e(the)i(left)f(side)h(that)f(node)
+396 2475 y Fq(y)h Fv(is)g(added)e(to)h Fq(x)h Fv(as)g(ne)n(w)f(subnode)
+e(which)i(is)h(allo)n(wed)f(because)f Fq(y)i Fv(does)f(not)g(ha)n(v)o
+(e)f(a)i(parent)e(yet.)h(The)g(right)f(side)i(of)396
+2583 y(the)f(picture)g(illustrates)g(what)h(w)o(ould)e(happen)g(if)h
+Fq(y)h Fv(had)e(a)i(parent)e(node;)g(this)i(is)g(ille)o(gal)f(because)f
+Fq(y)i Fv(w)o(ould)e(ha)n(v)o(e)h(tw)o(o)396 2691 y(parents)g(after)g
+(the)g(operation.)396 2923 y Fu(Figur)o(e)g(3-3.)f(A)i(node)f(can)g
+(only)g(be)h(added)g(if)f(it)h(is)g(a)f(r)o(oot)396 4165
+y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 4165 a @beginspecial 0 @llx 0 @lly 422 @urx 127
+@ury 4220 @rwi @setspecial
+%%BeginDocument: pic/node_add.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_add.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 422 127
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-33.0 171.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 4387 m -1000 -1000 l 10012 -1000 l 10012 4387 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 6141 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6141 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5426 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6856 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7571 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8524 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8047 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1866 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1866 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1151 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2581 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3296 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 4249 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3772 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+5507 1945 m 5402 2017 l 5460 1904 l 5369 2008 l 5415 2049 l cp
+clip
+n 5910 1440 m 5402 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 5507 1945 m 5402 2017 l 5460 1904 l 5484 1924 l 5507 1945 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6134 1902 m 6101 2025 l 6072 1901 l 6070 2039 l 6132 2041 l cp
+clip
+n 6109 1590 m 6101 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6134 1902 m 6101 2025 l 6072 1901 l 6103 1901 l 6134 1902 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6649 1952 m 6697 2070 l 6599 1989 l 6681 2100 l 6731 2064 l cp
+clip
+n 6307 1537 m 6697 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6649 1952 m 6697 2070 l 6599 1989 l 6624 1970 l 6649 1952 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+7696 2606 m 7602 2692 l 7645 2572 l 7568 2687 l 7619 2722 l cp
+clip
+n 7832 2347 m 7602 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 7696 2606 m 7602 2692 l 7645 2572 l 7671 2589 l 7696 2606 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8306 2632 m 8349 2752 l 8255 2666 l 8332 2782 l 8383 2747 l cp
+clip
+n 8150 2452 m 8349 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 8306 2632 m 8349 2752 l 8255 2666 l 8281 2649 l 8306 2632 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+5853 1564 m 5958 1492 l 5899 1605 l 5991 1501 l 5945 1460 l cp
+clip
+n 5490 2017 m 5958 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 5853 1564 m 5958 1492 l 5899 1605 l 5876 1584 l 5853 1564 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6140 1698 m 6173 1575 l 6201 1699 l 6204 1561 l 6142 1559 l cp
+clip
+n 6164 2010 m 6173 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6140 1698 m 6173 1575 l 6201 1699 l 6170 1699 l 6140 1698 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6404 1588 m 6355 1470 l 6454 1551 l 6371 1440 l 6321 1476 l cp
+clip
+n 6768 2025 m 6355 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 6404 1588 m 6355 1470 l 6454 1551 l 6429 1569 l 6404 1588 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+7784 2499 m 7880 2415 l 7835 2534 l 7914 2420 l 7863 2385 l cp
+clip
+n 7673 2715 m 7880 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 7784 2499 m 7880 2415 l 7835 2534 l 7810 2517 l 7784 2499 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+8263 2535 m 8222 2415 l 8315 2502 l 8240 2386 l 8188 2419 l cp
+clip
+n 8412 2707 m 8222 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 8263 2535 m 8222 2415 l 8315 2502 l 8289 2519 l 8263 2535 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+1232 1945 m 1127 2017 l 1185 1904 l 1094 2008 l 1140 2049 l cp
+clip
+n 1635 1440 m 1127 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1232 1945 m 1127 2017 l 1185 1904 l 1209 1924 l 1232 1945 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+1859 1902 m 1826 2025 l 1797 1901 l 1795 2039 l 1857 2041 l cp
+clip
+n 1834 1590 m 1826 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1859 1902 m 1826 2025 l 1797 1901 l 1828 1902 l 1859 1902 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2374 1952 m 2422 2070 l 2324 1989 l 2406 2100 l 2456 2064 l cp
+clip
+n 2032 1537 m 2422 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2374 1952 m 2422 2070 l 2324 1989 l 2349 1970 l 2374 1952 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+3421 2606 m 3327 2692 l 3370 2572 l 3293 2687 l 3344 2722 l cp
+clip
+n 3557 2347 m 3327 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3421 2606 m 3327 2692 l 3370 2572 l 3396 2589 l 3421 2606 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+4031 2632 m 4074 2752 l 3980 2666 l 4057 2782 l 4108 2747 l cp
+clip
+n 3875 2452 m 4074 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 4031 2632 m 4074 2752 l 3980 2666 l 4006 2649 l 4031 2632 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+1578 1564 m 1683 1492 l 1624 1605 l 1716 1501 l 1670 1460 l cp
+clip
+n 1215 2017 m 1683 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1578 1564 m 1683 1492 l 1624 1605 l 1601 1584 l 1578 1564 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+1865 1698 m 1898 1575 l 1926 1699 l 1929 1561 l 1867 1559 l cp
+clip
+n 1889 2010 m 1898 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 1865 1698 m 1898 1575 l 1926 1699 l 1895 1698 l 1865 1698 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2129 1588 m 2080 1470 l 2179 1551 l 2096 1440 l 2046 1476 l cp
+clip
+n 2493 2025 m 2080 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 2129 1588 m 2080 1470 l 2179 1551 l 2154 1569 l 2129 1588 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+3509 2499 m 3605 2415 l 3560 2534 l 3639 2420 l 3588 2385 l cp
+clip
+n 3398 2715 m 3605 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3509 2499 m 3605 2415 l 3560 2534 l 3535 2517 l 3509 2499 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+3988 2535 m 3947 2415 l 4040 2502 l 3965 2386 l 3913 2419 l cp
+clip
+n 4137 2707 m 3947 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr
+
+% arrowhead
+n 3988 2535 m 3947 2415 l 4040 2502 l 4014 2519 l 3988 2535 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+ [60] 0 sd
+n 6387 1372 m 8023 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd
+% Polyline
+n 4950 900 m 9000 900 l 9000 3375 l 4950 3375 l cp gs col0 s gr
+% Polyline
+ [60] 0 sd
+n 2112 1372 m 3748 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd
+% Polyline
+n 675 900 m 4725 900 l 4725 3375 l 675 3375 l cp gs col0 s gr
+% Polyline
+gs clippath
+8119 1904 m 8055 2010 l 8061 1886 l 8022 2016 l 8079 2033 l cp
+clip
+n 8197 1545 m 8055 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8119 1904 m 8055 2010 l 8061 1886 l 8090 1895 l 8119 1904 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8214 1695 m 8280 1590 l 8271 1713 l 8313 1585 l 8256 1566 l cp
+clip
+n 8137 2025 m 8280 1590 l gs col0 s gr gr
+
+% arrowhead
+n 8214 1695 m 8280 1590 l 8271 1713 l 8243 1704 l 8214 1695 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+30.000 slw
+gs clippath
+7687 2205 m 7502 2333 l 7594 2129 l 7410 2351 l 7503 2428 l cp
+clip
+n 7875 1500 m 7620 1965 l 7845 1920 l 7485 2355 l gs col0 s gr gr
+
+% arrowhead
+15.000 slw
+n 7687 2205 m 7502 2333 l 7594 2129 l 7618 2195 l 7687 2205 l cp gs 0.00 setgray ef gr col0 s
+/Courier-Bold ff 195.00 scf sf
+6094 1379 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+7991 2265 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+1819 1379 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 195.00 scf sf
+3716 2265 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier ff 180.00 scf sf
+6459 1335 m
+gs 1 -1 sc (x # add_node y) col0 sh gr
+/Courier ff 180.00 scf sf
+2214 1365 m
+gs 1 -1 sc (x # add_node y) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 4165 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 4165 a 357 x Fv(The)g("delete")g(operation)
+e(simply)i(remo)o(v)o(es)f(the)h(links)g(between)f(tw)o(o)i(nodes.)e
+(In)h(the)g(picture)f(\()p Fr(A)i(deleted)e(node)396
+4629 y(becomes)h(the)g(r)l(oot)g(of)h(the)f(subtr)m(ee)p
+Fv(\))g(the)g(node)f Fq(x)i Fv(is)g(deleted)e(from)h(the)g(list)h(of)f
+(subnodes)f(of)h Fq(y)p Fv(.)g(After)g(that,)g Fq(x)396
+4737 y Fv(becomes)g(the)g(root)f(of)h(the)g(subtree)g(starting)g(at)g
+(this)h(node.)p Black 3800 5278 a Fr(51)p Black eop
+%%Page: 52 52
+52 51 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-4.)f(A)i
+(deleted)f(node)g(becomes)h(the)f(r)o(oot)f(of)h(the)g(subtr)o(ee)396
+1912 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1912 a @beginspecial 0 @llx 0 @lly 388 @urx
+138 @ury 3880 @rwi @setspecial
+%%BeginDocument: pic/node_delete.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_delete.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 388 138
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-78.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
+clip
+n 2535 2857 m 2235 3360 l gs col0 s gr gr
+
+% arrowhead
+n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
+clip
+n 2782 2932 m 3000 3420 l gs col0 s gr gr
+
+% arrowhead
+n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
+clip
+n 2317 3367 m 2587 2910 l gs col0 s gr gr
+
+% arrowhead
+n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
+clip
+n 3060 3375 m 2842 2887 l gs col0 s gr gr
+
+% arrowhead
+n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s
+% Ellipse
+n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+gs clippath
+2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
+clip
+n 2737 2460 m 2737 2032 l gs col0 s gr gr
+
+% arrowhead
+n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
+clip
+n 2662 2032 m 2662 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+1 slj
+60.000 slw
+n 4050 2610 m 4725 2610 l gs col0 s gr
+% Polyline
+n 4050 2745 m 4725 2745 l gs col0 s gr
+% Polyline
+1 slc
+n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr
+% Polyline
+0 slj
+0 slc
+7.500 slw
+gs clippath
+2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
+clip
+n 2490 1905 m 2025 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
+clip
+n 2827 2002 m 3202 2542 l gs col0 s gr gr
+
+% arrowhead
+n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
+clip
+n 2115 2475 m 2535 1965 l gs col0 s gr gr
+
+% arrowhead
+n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
+clip
+n 3255 2505 m 2872 1957 l gs col0 s gr gr
+
+% arrowhead
+n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
+clip
+n 6135 1905 m 5670 2467 l gs col0 s gr gr
+
+% arrowhead
+n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
+clip
+n 6472 2002 m 6847 2542 l gs col0 s gr gr
+
+% arrowhead
+n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
+clip
+n 5760 2475 m 6180 1965 l gs col0 s gr gr
+
+% arrowhead
+n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
+clip
+n 6900 2505 m 6517 1957 l gs col0 s gr gr
+
+% arrowhead
+n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
+clip
+n 8160 1957 m 7860 2460 l gs col0 s gr gr
+
+% arrowhead
+n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
+clip
+n 8407 2032 m 8625 2520 l gs col0 s gr gr
+
+% arrowhead
+n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
+clip
+n 7942 2467 m 8212 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
+clip
+n 8685 2475 m 8467 1987 l gs col0 s gr gr
+
+% arrowhead
+n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s
+/Courier ff 180.00 scf sf
+3960 2250 m
+gs 1 -1 sc (x # delete) col0 sh gr
+% Polyline
+1 slj
+1 slc
+45.000 slw
+n 2595 2362 m 2820 2137 l gs col0 s gr
+% Polyline
+n 2595 2137 m 2820 2362 l gs col0 s gr
+% Polyline
+0 slj
+0 slc
+7.500 slw
+n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr
+/Courier-Bold ff 180.00 scf sf
+2640 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+8280 1845 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+2655 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6300 1845 m
+gs 1 -1 sc (y) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1912 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1912 a 357 x Fv(It)g(is)h(also)e(possible)h
+(to)f(mak)o(e)h(a)g(clone)e(of)i(a)g(subtree;)f(illustrated)g(in)h
+Fr(The)f(clone)g(of)h(a)f(subtr)m(ee)p Fv(.)h(In)f(this)h(case,)g(the)f
+(clone)396 2377 y(is)i(a)g(cop)o(y)e(of)h(the)g(original)f(subtree)h(e)
+o(xcept)f(that)h(it)h(is)h(no)d(longer)g(a)i(subnode.)d(Because)i
+(cloning)f(ne)n(v)o(er)g(k)o(eeps)h(the)396 2485 y(connection)e(to)j
+(the)f(parent,)f(the)h(clones)g(are)g(called)g Fr(orphaned)r
+Fv(.)396 2717 y Fu(Figur)o(e)g(3-5.)f(The)i(clone)f(of)g(a)g(subtr)o
+(ee)396 4050 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 4050 a @beginspecial 0 @llx 0 @lly
+388 @urx 138 @ury 3880 @rwi @setspecial
+%%BeginDocument: pic/node_clone.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/node_clone.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 388 138
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-78.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6345 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 5895 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 6795 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+1 slj
+60.000 slw
+n 4050 2610 m 4725 2610 l gs col0 s gr
+% Polyline
+n 4050 2745 m 4725 2745 l gs col0 s gr
+% Polyline
+1 slc
+n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr
+% Polyline
+0 slj
+0 slc
+7.500 slw
+gs clippath
+2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp
+clip
+n 2490 1905 m 2025 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp
+clip
+n 2827 2002 m 3202 2542 l gs col0 s gr gr
+
+% arrowhead
+n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp
+clip
+n 2115 2475 m 2535 1965 l gs col0 s gr gr
+
+% arrowhead
+n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp
+clip
+n 3255 2505 m 2872 1957 l gs col0 s gr gr
+
+% arrowhead
+n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp
+clip
+n 6135 1905 m 5670 2467 l gs col0 s gr gr
+
+% arrowhead
+n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp
+clip
+n 6472 2002 m 6847 2542 l gs col0 s gr gr
+
+% arrowhead
+n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp
+clip
+n 5760 2475 m 6180 1965 l gs col0 s gr gr
+
+% arrowhead
+n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp
+clip
+n 6900 2505 m 6517 1957 l gs col0 s gr gr
+
+% arrowhead
+n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp
+clip
+n 8160 1957 m 7860 2460 l gs col0 s gr gr
+
+% arrowhead
+n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp
+clip
+n 8407 2032 m 8625 2520 l gs col0 s gr gr
+
+% arrowhead
+n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp
+clip
+n 7942 2467 m 8212 2010 l gs col0 s gr gr
+
+% arrowhead
+n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp
+clip
+n 8685 2475 m 8467 1987 l gs col0 s gr gr
+
+% arrowhead
+n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6352 2152 m 6382 2032 l 6412 2152 l 6412 2017 l 6352 2017 l cp
+clip
+n 6382 2460 m 6382 2032 l gs col0 s gr gr
+
+% arrowhead
+n 6352 2152 m 6382 2032 l 6412 2152 l 6382 2152 l 6352 2152 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6337 2347 m 6307 2467 l 6277 2347 l 6277 2482 l 6337 2482 l cp
+clip
+n 6307 2032 m 6307 2467 l gs col0 s gr gr
+
+% arrowhead
+n 6337 2347 m 6307 2467 l 6277 2347 l 6307 2347 l 6337 2347 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+5967 3272 m 5880 3360 l 5916 3242 l 5847 3358 l 5898 3388 l cp
+clip
+n 6180 2857 m 5880 3360 l gs col0 s gr gr
+
+% arrowhead
+n 5967 3272 m 5880 3360 l 5916 3242 l 5941 3257 l 5967 3272 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6623 3298 m 6645 3420 l 6569 3323 l 6624 3446 l 6679 3421 l cp
+clip
+n 6427 2932 m 6645 3420 l gs col0 s gr gr
+
+% arrowhead
+n 6623 3298 m 6645 3420 l 6569 3323 l 6596 3310 l 6623 3298 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+6145 2998 m 6232 2910 l 6197 3029 l 6265 2912 l 6214 2882 l cp
+clip
+n 5962 3367 m 6232 2910 l gs col0 s gr gr
+
+% arrowhead
+n 6145 2998 m 6232 2910 l 6197 3029 l 6171 3013 l 6145 2998 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+6509 3009 m 6487 2887 l 6563 2984 l 6508 2861 l 6453 2886 l cp
+clip
+n 6705 3375 m 6487 2887 l gs col0 s gr gr
+
+% arrowhead
+n 6509 3009 m 6487 2887 l 6563 2984 l 6536 2997 l 6509 3009 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp
+clip
+n 2737 2460 m 2737 2032 l gs col0 s gr gr
+
+% arrowhead
+n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp
+clip
+n 2662 2032 m 2662 2467 l gs col0 s gr gr
+
+% arrowhead
+n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp
+clip
+n 2535 2857 m 2235 3360 l gs col0 s gr gr
+
+% arrowhead
+n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp
+clip
+n 2782 2932 m 3000 3420 l gs col0 s gr gr
+
+% arrowhead
+n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s
+% Polyline
+gs clippath
+2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp
+clip
+n 2317 3367 m 2587 2910 l gs col0 s gr gr
+
+% arrowhead
+n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+gs clippath
+2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp
+clip
+n 3060 3375 m 2842 2887 l gs col0 s gr gr
+
+% arrowhead
+n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s
+% Polyline
+n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr
+/Courier-Bold ff 180.00 scf sf
+2655 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6300 1845 m
+gs 1 -1 sc (y) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6285 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+2640 2752 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier ff 180.00 scf sf
+3690 2025 m
+gs 1 -1 sc (let x' =) col0 sh gr
+/Courier ff 180.00 scf sf
+3690 2205 m
+gs 1 -1 sc (x # orphaned_clone) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+8235 1845 m
+gs 1 -1 sc (x') col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 4050 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 4050 a -2 4627 a Fp(3.2.2.)35
+b(The)f(methods)g(of)f(the)h(c)n(lass)h(type)f Fc(node)p
+Black 3800 5278 a Fr(52)p Black eop
+%%Page: 53 53
+53 52 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(General)g(obser)o(v)o
+(ers)g(.)p Black 396 866 a Ft(\225)p Black 60 w Fq(extension)p
+Fv(:)g(The)f(reference)g(to)h(the)h(e)o(xtension)d(object)i(which)g
+(belongs)f(to)h(this)h(node)e(\(see)h(...\).)p Black
+396 974 a Ft(\225)p Black 60 w Fq(dtd)p Fv(:)h(Returns)f(a)g(reference)
+f(to)h(the)g(global)g(DTD.)g(All)h(nodes)e(of)h(a)h(tree)f(must)g
+(share)g(the)g(same)h(DTD.)p Black 396 1082 a Ft(\225)p
+Black 60 w Fq(parent)p Fv(:)f(Get)h(the)f(f)o(ather)f(node.)g(Raises)j
+Fq(Not_found)d Fv(in)i(the)f(case)g(the)h(node)e(does)h(not)f(ha)n(v)o
+(e)h(a)h(parent,)e(i.e.)h(the)479 1190 y(node)f(is)j(the)e(root.)p
+Black 396 1298 a Ft(\225)p Black 60 w Fq(root)p Fv(:)g(Gets)h(the)g
+(reference)d(to)i(the)h(root)e(node)g(of)h(the)g(tree.)g(Ev)o(ery)f
+(node)g(is)i(contained)e(in)h(a)h(tree)f(with)h(a)f(root,)f(so)479
+1406 y(this)h(method)f(al)o(w)o(ays)h(succeeds.)e(Note)i(that)g(this)g
+(method)e Fr(sear)m(c)o(hes)h Fv(the)h(root,)e(which)h(costs)h(time)g
+(proportional)d(to)479 1514 y(the)j(length)g(of)g(the)g(path)g(to)g
+(the)g(root.)p Black 396 1622 a Ft(\225)p Black 60 w
+Fq(sub_nodes)p Fv(:)g(Returns)g(references)e(to)j(the)f(children.)f
+(The)g(returned)g(list)i(re\003ects)g(the)f(order)f(of)h(the)g
+(children.)e(F)o(or)479 1730 y(data)i(nodes,)g(this)g(method)f(returns)
+g(the)i(empty)e(list.)p Black 396 1838 a Ft(\225)p Black
+60 w Fq(iter_nodes)43 b(f)p Fv(:)21 b(Iterates)f(o)o(v)o(er)f(the)h
+(children,)f(and)g(calls)i Fq(f)g Fv(for)e(e)n(v)o(ery)g(child)h(in)g
+(turn.)p Black 396 1945 a Ft(\225)p Black 60 w Fq(iter_nodes_sibl)43
+b(f)p Fv(:)20 b(Iterates)g(o)o(v)o(er)f(the)h(children,)f(and)h(calls)g
+Fq(f)h Fv(for)f(e)n(v)o(ery)e(child)i(in)h(turn.)e Fq(f)h
+Fv(gets)h(as)479 2053 y(ar)o(guments)d(the)j(pre)n(vious)d(node,)h(the)
+h(current)f(node,)g(and)h(the)g(ne)o(xt)f(node.)p Black
+396 2161 a Ft(\225)p Black 60 w Fq(node_type)p Fv(:)h(Returns)g(either)
+f Fq(T_data)h Fv(which)g(means)g(that)g(the)g(node)f(is)i(a)g(data)f
+(node,)f(or)h Fq(T_element)43 b(n)479 2269 y Fv(which)20
+b(means)g(that)g(the)g(node)f(is)j(an)e(element)f(of)h(type)g
+Fq(n)p Fv(.)g(If)g(con\002gured,)e(possible)i(node)f(types)h(are)g
+(also)479 2377 y Fq(T_pinstr)44 b(t)20 b Fv(indicating)f(that)h(the)h
+(node)e(represents)g(a)i(processing)e(instruction)g(with)h(tar)o(get)f
+Fq(t)p Fv(,)i(and)479 2485 y Fq(T_comment)f Fv(in)g(which)g(case)g(the)
+g(node)g(is)h(a)f(comment.)p Black 396 2593 a Ft(\225)p
+Black 60 w Fq(encoding)p Fv(:)g(Returns)g(the)g(encoding)e(of)i(the)g
+(strings.)p Black 396 2701 a Ft(\225)p Black 60 w Fq(data)p
+Fv(:)g(Returns)g(the)h(character)e(data)h(of)g(this)g(node)f(and)h(all)
+h(children,)d(concatenated)h(as)i(one)e(string.)h(The)479
+2809 y(encoding)e(of)i(the)h(string)e(is)j(what)e(the)g(method)f
+Fq(encoding)g Fv(returns.)g(-)i(F)o(or)e(data)h(nodes,)g(this)g(method)
+f(simply)479 2917 y(returns)h(the)g(represented)e(characters.)h(F)o(or)
+h(elements,)g(the)g(meaning)f(of)g(the)i(method)d(has)j(been)e(e)o
+(xtended)g(such)479 3025 y(that)i(it)f(returns)g(something)e(useful,)i
+(i.e.)g(the)g(ef)n(fecti)n(v)o(ely)f(contained)f(characters,)h(without)
+h(markup.)e(\(F)o(or)479 3133 y Fq(T_pinstr)i Fv(and)f
+Fq(T_comment)h Fv(nodes,)f(the)h(method)f(returns)g(the)h(empty)g
+(string.\))p Black 396 3241 a Ft(\225)p Black 60 w Fq(position)p
+Fv(:)g(If)g(con\002gured,)d(this)k(method)e(returns)g(the)h(position)g
+(of)g(the)g(element)g(as)g(triple)g(\(entity)-5 b(,)19
+b(line,)479 3349 y(byteposition\).)f(F)o(or)i(data)g(nodes,)f(the)h
+(position)g(is)h(not)f(stored.)f(If)h(the)g(position)g(is)h(not)f(a)n
+(v)n(ailable)f(the)i(triple)f Fq("?",)479 3456 y(0,)45
+b(0)20 b Fv(is)h(returned.)p Black 396 3564 a Ft(\225)p
+Black 60 w Fq(comment)p Fv(:)f(Returns)g Fq(Some)44 b(text)20
+b Fv(for)f(comment)g(nodes,)g(and)g Fq(None)h Fv(for)g(other)f(nodes.)g
+(The)h Fq(text)f Fv(is)i(e)n(v)o(erything)479 3672 y(between)f(the)g
+(comment)f(delimiters)g Fo(<)p Fq(-)i Fv(and)e Fq(-)p
+Fo(>)p Fv(.)p Black 396 3780 a Ft(\225)p Black 60 w Fq(pinstr)44
+b(n)p Fv(:)21 b(Returns)f(all)h(processing)d(instructions)i(that)g(are)
+g(directly)f(contained)g(in)h(this)h(element)e(and)h(that)g(ha)n(v)o(e)
+479 3888 y(a)h Fr(tar)m(g)o(et)h Fv(speci\002cation)d(of)h
+Fq(n)p Fv(.)g(The)g(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(after)h
+(the)g Fo(<)p Fq(?)p Fv(.)p Black 396 3996 a Ft(\225)p
+Black 60 w Fq(pinstr_names)p Fv(:)f(Returns)h(the)g(list)i(of)e(all)g
+(tar)o(gets)g(of)g(processing)f(instructions)g(directly)g(contained)g
+(in)h(this)479 4104 y(element.)p Black 396 4212 a Ft(\225)p
+Black 60 w Fq(write)44 b(s)h(enc)p Fv(:)20 b(Prints)h(the)f(node)f(and)
+h(all)h(subnodes)d(to)j(the)f(passed)g(output)f(stream)h(as)h(v)n(alid)
+f(XML)g(te)o(xt,)g(using)479 4320 y(the)g(passed)h(e)o(xternal)e
+(encoding.)396 4511 y Fu(Attrib)n(ute)h(obser)o(v)o(ers)h(.)p
+Black 396 4743 a Ft(\225)p Black 60 w Fq(attribute)44
+b(n)p Fv(:)20 b(Returns)g(the)h(v)n(alue)e(of)h(the)g(attrib)n(ute)g
+(with)g(name)g Fq(n)p Fv(.)g(This)h(method)d(returns)i(a)g(v)n(alue)g
+(for)f(e)n(v)o(ery)479 4851 y(declared)g(attrib)n(ute,)h(and)f(it)i
+(raises)g Fq(Not_found)e Fv(for)h(an)o(y)f(undeclared)f(attrib)n(ute.)i
+(Note)g(that)g(it)h(e)n(v)o(en)e(returns)h(a)p Black
+3800 5278 a Fr(53)p Black eop
+%%Page: 54 54
+54 53 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 479 579 a Fv(v)n(alue)g(if)g(the)g
+(attrib)n(ute)g(is)h(actually)f(missing)g(b)n(ut)g(is)h(declared)e(as)i
+Fq(#IMPLIED)f Fv(or)g(has)g(a)h(def)o(ault)e(v)n(alue.)g(-)i(Possible)
+479 687 y(v)n(alues)f(are:)p Black 479 919 a Fa(\225)p
+Black 62 w Fq(Implied_value)p Fv(:)f(The)h(attrib)n(ute)g(has)g(been)g
+(declared)e(with)j(the)f(k)o(e)o(yw)o(ord)e Fq(#IMPLIED)p
+Fv(,)i(and)f(the)h(attrib)n(ute)g(is)562 1027 y(missing)g(in)h(the)f
+(attrib)n(ute)g(list)h(of)f(this)h(element.)p Black 479
+1135 a Fa(\225)p Black 62 w Fq(Value)44 b(s)p Fv(:)21
+b(The)f(attrib)n(ute)g(has)g(been)g(declared)e(as)j(type)f
+Fq(CDATA)p Fv(,)g(as)h Fq(ID)p Fv(,)f(as)h Fq(IDREF)p
+Fv(,)e(as)i Fq(ENTITY)p Fv(,)f(or)g(as)562 1243 y Fq(NMTOKEN)p
+Fv(,)g(or)g(as)g(enumeration)e(or)i(notation,)f(and)g(one)h(of)g(the)g
+(tw)o(o)h(conditions)d(holds:)i(\(1\))g(The)g(attrib)n(ute)562
+1351 y(v)n(alue)g(is)h(present)e(in)i(the)f(attrib)n(ute)g(list)h(in)f
+(which)g(case)h(the)f(v)n(alue)f(is)j(returned)c(in)i(the)h(string)e
+Fq(s)p Fv(.)i(\(2\))e(The)562 1459 y(attrib)n(ute)h(has)h(been)e
+(omitted,)g(and)h(the)g(DTD)g(declared)f(the)i(attrib)n(ute)e(with)i(a)
+f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)562 1567
+y(v)n(alue)f(is)i(returned)d(in)i Fq(s)p Fv(.)g(-)g(Summarized,)d
+Fq(Value)44 b(s)20 b Fv(is)h(returned)d(for)h(non-implied,)e(non-list)i
+(attrib)n(ute)g(v)n(alues.)p Black 479 1675 a Fa(\225)p
+Black 62 w Fq(Valuelist)44 b(l)p Fv(:)20 b(The)g(attrib)n(ute)g(has)g
+(been)g(declared)f(as)i(type)e Fq(IDREFS)p Fv(,)h(as)h
+Fq(ENTITIES)p Fv(,)e(or)h(as)h Fq(NMTOKENS)p Fv(,)562
+1783 y(and)f(one)g(of)f(the)i(tw)o(o)f(conditions)f(holds:)h(\(1\))f
+(The)h(attrib)n(ute)g(v)n(alue)f(is)i(present)f(in)g(the)h(attrib)n
+(ute)e(list)j(in)e(which)562 1891 y(case)h(the)f(space-separated)e(tok)
+o(ens)i(of)g(the)g(v)n(alue)g(are)g(returned)e(in)j(the)f(string)g
+(list)h Fq(l)p Fv(.)f(\(2\))g(The)g(attrib)n(ute)g(has)562
+1999 y(been)g(omitted,)f(and)h(the)g(DTD)g(declared)f(the)h(attrib)n
+(ute)g(with)h(a)f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)f(v)n(alue)
+h(is)h(returned)562 2107 y(in)g Fq(l)p Fv(.)f(-)g(Summarized,)f
+Fq(Valuelist)43 b(l)20 b Fv(is)i(returned)c(for)i(all)g(list-type)g
+(attrib)n(ute)g(v)n(alues.)396 2256 y(Note)g(that)h(before)d(the)j
+(attrib)n(ute)f(v)n(alue)f(is)i(returned,)d(the)i(v)n(alue)g(is)h
+(normalized.)d(This)j(means)e(that)i(ne)n(wlines)e(are)479
+2364 y(con)m(v)o(erted)f(to)i(spaces,)g(and)g(that)g(references)f(to)h
+(character)f(entities)i(\(i.e.)f Fq(&#)p Fn(n)p Fq(;)p
+Fv(\))g(and)f(general)g(entities)i(\(i.e.)479 2472 y
+Fq(&)p Fn(name)p Fq(;)p Fv(\))f(are)g(e)o(xpanded;)e(if)i(necessary)-5
+b(,)19 b(e)o(xpansion)f(is)j(performed)d(recursi)n(v)o(ely)-5
+b(.)479 2621 y(In)20 b(well-formedness)e(mode,)h(there)h(is)h(no)f(DTD)
+g(which)g(could)f(declare)h(an)g(attrib)n(ute.)f(Because)i(of)f(this,)g
+(e)n(v)o(ery)479 2729 y(occuring)f(attrib)n(ute)g(is)i(considered)e(as)
+i(a)f(CD)m(A)-9 b(T)h(A)21 b(attrib)n(ute.)p Black 396
+2879 a Ft(\225)p Black 60 w Fq(required_string_attribute)41
+b(n)p Fv(:)21 b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)
+g(n,)g(or)g(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
+2987 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
+(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
+(if)h(the)479 3094 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
+(method)f(will)i(f)o(ail.)g(-)f(This)g(method)f(is)i(con)m(v)o(enient)d
+(if)i(you)g(e)o(xpect)f(a)h(non-implied)479 3202 y(and)g(non-list)f
+(attrib)n(ute)h(v)n(alue.)p Black 396 3310 a Ft(\225)p
+Black 60 w Fq(optional_string_attribute)41 b(n)p Fv(:)21
+b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)g(n,)g(or)g
+(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479
+3418 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h
+(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h
+(if)h(the)479 3526 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g
+(method)f(returns)h(None.)f(-)h(This)h(method)e(is)i(con)m(v)o(enient)c
+(if)k(you)e(e)o(xpect)g(a)i(non-list)479 3634 y(attrib)n(ute)f(v)n
+(alue)g(including)e(the)i(implied)g(v)n(alue.)p Black
+396 3742 a Ft(\225)p Black 60 w Fq(required_list_attribute)41
+b(n)p Fv(:)20 b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f
+(called)g(n,)g(or)g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g
+(list)479 3850 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n
+(ute)g(v)n(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g
+(not)g(e)o(xists,)g(the)g(method)479 3958 y(will)h(f)o(ail.)g(-)f(This)
+g(method)f(is)i(con)m(v)o(enient)d(if)i(you)g(e)o(xpect)f(a)h(list)i
+(attrib)n(ute)d(v)n(alue.)p Black 396 4066 a Ft(\225)p
+Black 60 w Fq(optional_list_attribute)41 b(n)p Fv(:)20
+b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f(called)g(n,)g(or)
+g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g(list)479
+4174 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n(ute)g(v)n
+(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g(not)g(e)o
+(xists,)g(an)g(empty)g(list)479 4282 y(will)h(be)f(returned.)e(-)j
+(This)f(method)f(is)i(con)m(v)o(enient)d(if)i(you)f(e)o(xpect)h(a)g
+(list)i(attrib)n(ute)d(v)n(alue)h(or)g(the)g(implied)f(v)n(alue.)p
+Black 396 4390 a Ft(\225)p Black 60 w Fq(attribute_names)p
+Fv(:)g(returns)g(the)h(list)h(of)f(all)h(attrib)n(ute)f(names)g(of)g
+(this)g(element.)g(As)h(this)f(is)i(a)e(v)n(alidating)479
+4498 y(parser)m(,)f(this)i(list)g(is)g(equal)f(to)g(the)h(list)g(of)f
+(declared)f(attrib)n(utes.)p Black 396 4605 a Ft(\225)p
+Black 60 w Fq(attribute_type)43 b(n)p Fv(:)20 b(returns)g(the)g(type)g
+(of)g(the)g(attrib)n(ute)g(called)g Fq(n)p Fv(.)g(See)h(the)f(module)f
+Fq(Pxp_types)g Fv(for)g(a)479 4713 y(description)g(of)h(the)g(encoding)
+e(of)i(the)g(types.)p Black 396 4821 a Ft(\225)p Black
+60 w Fq(attributes)p Fv(:)f(returns)h(the)g(list)h(of)f(pairs)g(of)g
+(names)g(and)g(v)n(alues)g(for)f(all)i(attrib)n(utes)f(of)g(this)h
+(element.)p Black 3800 5278 a Fr(54)p Black eop
+%%Page: 55 55
+55 54 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(id_attribute_name)p Fv(:)e(returns)h(the)i(name)e(of)h
+(the)g(attrib)n(ute)g(that)g(is)h(declared)e(with)h(type)g(ID.)g(There)
+f(is)i(at)g(most)479 687 y(one)f(such)g(attrib)n(ute.)f(The)h(method)f
+(raises)i Fq(Not_found)e Fv(if)i(there)e(is)i(no)f(declared)f(ID)i
+(attrib)n(ute)e(for)h(the)g(element)479 795 y(type.)p
+Black 396 903 a Ft(\225)p Black 60 w Fq(id_attribute_value)p
+Fv(:)e(returns)h(the)i(v)n(alue)e(of)h(the)g(attrib)n(ute)g(that)g(is)h
+(declared)e(with)i(type)e(ID.)i(There)e(is)i(at)479 1011
+y(most)g(one)e(such)h(attrib)n(ute.)g(The)g(method)e(raises)j
+Fq(Not_found)e Fv(if)i(there)f(is)h(no)e(declared)g(ID)i(attrib)n(ute)f
+(for)f(the)479 1119 y(element)h(type.)p Black 396 1226
+a Ft(\225)p Black 60 w Fq(idref_attribute_names)p Fv(:)d(returns)h(the)
+h(list)i(of)e(attrib)n(ute)f(names)h(that)h(are)f(declared)f(as)i
+(IDREF)f(or)g(IDREFS.)396 1417 y Fu(Modifying)h(methods)h(.)f
+Fv(The)g(follo)n(wing)f(methods)g(are)h(only)f(de\002ned)g(for)h
+(element)f(nodes)h(\(more)f(e)o(xactly:)g(the)396 1525
+y(methods)g(are)i(de\002ned)e(for)g(data)h(nodes,)f(too,)h(b)n(ut)g(f)o
+(ail)h(al)o(w)o(ays\).)p Black 396 1758 a Ft(\225)p Black
+60 w Fq(add_node)44 b(sn)p Fv(:)20 b(Adds)g(sub)g(node)g
+Fq(sn)g Fv(to)g(the)g(list)i(of)e(children.)e(This)j(operation)d(is)j
+(illustrated)f(in)g(the)g(picture)g Fr(A)479 1866 y(node)f(can)h(only)g
+(be)g(added)f(if)h(it)h(is)h(a)e(r)l(oot)q Fv(.)g(This)h(method)e(e)o
+(xpects)g(that)h Fq(sn)h Fv(is)g(a)g(root,)e(and)g(it)i(requires)f
+(that)g Fq(sn)g Fv(and)479 1974 y(the)g(current)f(object)h(share)g(the)
+g(same)h(DTD.)479 2123 y(Because)g Fq(add_node)e Fv(is)i(the)f(method)f
+(the)h(parser)g(itself)h(uses)g(to)f(add)g(ne)n(w)g(nodes)f(to)h(the)h
+(tree,)e(it)i(performs)e(by)479 2231 y(def)o(ault)h(some)g(simple)g(v)n
+(alidation)f(checks:)g(If)h(the)h(content)e(model)g(is)i(a)g(re)o
+(gular)e(e)o(xpression,)f(it)j(is)g(not)f(allo)n(wed)f(to)479
+2339 y(add)h(data)g(nodes)f(to)i(this)g(node)e(unless)h(the)g(ne)n(w)g
+(nodes)g(consist)g(only)f(of)h(whitespace.)g(In)g(this)g(case,)h(the)f
+(ne)n(w)g(data)479 2447 y(nodes)g(are)g(silently)g(dropped)e(\(you)h
+(can)h(change)f(this)h(by)g(in)m(v)n(oking)e Fq
+(keep_always_whitespace_mode)p Fv(\).)479 2596 y(If)i(the)h(document)d
+(is)j(\003agged)e(as)i(stand-alone,)d(these)j(data)f(nodes)f(only)g
+(containing)g(whitespace)g(are)h(e)n(v)o(en)479 2704
+y(forbidden)e(if)i(the)h(element)e(declaration)g(is)i(contained)d(in)j
+(an)f(e)o(xternal)f(entity)-5 b(.)19 b(This)h(case)h(is)g(detected)f
+(and)479 2812 y(rejected.)479 2962 y(If)g(the)h(content)e(model)g(is)i
+Fq(EMPTY)p Fv(,)f(it)h(is)g(not)f(allo)n(wed)f(to)i(add)e(an)o(y)h
+(data)g(node)f(unless)h(the)g(data)g(node)g(is)h(empty)-5
+b(.)18 b(In)479 3070 y(this)j(case,)f(the)h(ne)n(w)f(data)g(node)f(is)i
+(silently)f(dropped.)479 3219 y(These)g(checks)g(only)f(apply)h(if)g
+(there)g(is)h(a)f(DTD.)h(In)f(well-formedness)e(mode,)h(it)i(is)g
+(assumed)e(that)i(e)n(v)o(ery)d(element)479 3327 y(is)j(declared)e
+(with)i(content)e(model)g Fq(ANY)h Fv(which)g(prohibits)f(an)o(y)g(v)n
+(alidation)g(check.)g(Furthermore,)f(you)h(turn)h(these)479
+3435 y(checks)g(of)n(f)f(by)h(passing)g Fq(~force:true)f
+Fv(as)i(\002rst)g(ar)o(gument.)p Black 396 3584 a Ft(\225)p
+Black 60 w Fq(add_pinstr)43 b(pi)p Fv(:)21 b(Adds)f(the)g(processing)f
+(instruction)g Fq(pi)h Fv(to)h(the)f(list)h(of)f(processing)f
+(instructions.)p Black 396 3692 a Ft(\225)p Black 60
+w Fq(delete)p Fv(:)h(Deletes)h(this)g(node)e(from)g(the)h(tree.)g
+(After)g(this)h(operation,)d(this)i(node)g(is)h(no)f(longer)e(the)j
+(child)e(of)h(the)479 3800 y(former)f(f)o(ather)g(node;)f(and)i(the)g
+(node)e(loses)j(the)e(connection)f(to)i(the)g(f)o(ather)f(as)h(well.)h
+(This)e(operation)f(is)j(illustrated)479 3908 y(by)f(the)g(\002gure)g
+Fr(A)g(deleted)g(node)f(becomes)g(the)i(r)l(oot)f(of)g(the)h(subtr)m
+(ee)p Fv(.)p Black 396 4016 a Ft(\225)p Black 60 w Fq(set_nodes)44
+b(nl)p Fv(:)20 b(Sets)h(the)f(list)i(of)e(children)e(to)j
+Fq(nl)p Fv(.)f(It)g(is)i(required)c(that)i(e)n(v)o(ery)f(member)g(of)h
+Fq(nl)g Fv(is)h(a)g(root,)e(and)479 4124 y(that)i(all)f(members)f(and)h
+(the)g(current)f(object)h(share)g(the)g(same)g(DTD.)g(Unlik)o(e)g
+Fq(add_node)p Fv(,)g(no)f(v)n(alidation)g(checks)479
+4232 y(are)h(performed.)p Black 396 4340 a Ft(\225)p
+Black 60 w Fq(quick_set_attributes)42 b(atts)p Fv(:)20
+b(sets)h(the)f(attrib)n(utes)h(of)e(this)i(element)f(to)g
+Fq(atts)p Fv(.)g(It)g(is)i Fr(not)f Fv(check)o(ed)479
+4448 y(whether)e Fq(atts)i Fv(matches)e(the)i(DTD)f(or)g(not;)g(it)h
+(is)g(up)f(to)g(the)g(caller)g(of)g(this)h(method)e(to)h(ensure)g
+(this.)g(\(This)479 4556 y(method)f(may)h(be)g(useful)g(to)g(transform)
+e(the)j(attrib)n(ute)f(v)n(alues,)f(i.e.)h(apply)f(a)i(mapping)d(to)j
+(e)n(v)o(ery)e(attrib)n(ute.\))p Black 396 4664 a Ft(\225)p
+Black 60 w Fq(set_comment)43 b(text)p Fv(:)20 b(This)h(method)e(is)i
+(only)e(applicable)g(to)h Fq(T_comment)g Fv(nodes;)f(it)i(sets)g(the)g
+(comment)d(te)o(xt)479 4772 y(contained)h(by)h(such)g(nodes.)p
+Black 3800 5278 a Fr(55)p Black eop
+%%Page: 56 56
+56 55 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Cloning)g(methods)h(.)
+p Black 396 811 a Ft(\225)p Black 60 w Fq(orphaned_clone)p
+Fv(:)e(Returns)h(a)g(clone)g(of)g(the)g(node)f(and)h(the)g(complete)f
+(tree)h(belo)n(w)g(this)h(node)e(\(deep)g(clone\).)479
+919 y(The)h(clone)g(does)g(not)g(ha)n(v)o(e)f(a)i(parent)e(\(i.e.)h
+(the)g(reference)f(to)h(the)g(parent)f(node)g(is)j Fr(not)f
+Fv(cloned\).)d(While)j(cop)o(ying)479 1027 y(the)f(subtree,)g(strings)g
+(are)g(skipped;)f(it)i(is)g(lik)o(ely)f(that)h(the)f(original)f(tree)h
+(and)g(the)g(cop)o(y)f(tree)h(share)g(strings.)479 1135
+y(Extension)f(objects)h(are)g(cloned)f(by)h(in)m(v)n(oking)e(the)i
+Fq(clone)g Fv(method)f(on)h(the)g(original)f(objects;)h(ho)n(w)g(much)f
+(of)h(the)479 1243 y(e)o(xtension)f(objects)h(is)h(cloned)e(depends)g
+(on)h(the)g(implemention)e(of)i(this)h(method.)479 1393
+y(This)g(operation)d(is)j(illustrated)f(by)g(the)g(\002gure)f
+Fr(The)i(clone)e(of)i(a)f(subtr)m(ee)p Fv(.)p Black 396
+1542 a Ft(\225)p Black 60 w Fq(orphaned_flat_clone)p
+Fv(:)e(Returns)i(a)h(clone)e(of)h(the)g(node,)f(b)n(ut)h(sets)i(the)e
+(list)h(of)f(sub)g(nodes)g(to)g([],)g(i.e.)g(the)g(sub)479
+1650 y(nodes)g(are)g(not)g(cloned.)p Black 396 1758 a
+Ft(\225)p Black 81 w Fq(create_element)42 b(dtd)i(nt)h(al)p
+Fv(:)20 b(Returns)f(a)i(\003at)f(cop)o(y)f(of)g(this)i(node)d(\(which)h
+(must)h(be)f(an)h(element\))f(with)h(the)479 1866 y(follo)n(wing)f
+(modi\002cations:)g(The)h(DTD)g(is)h(set)g(to)f Fq(dtd)p
+Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f Fq(nt)p Fv(,)g(and)g(the)g
+(ne)n(w)g(attrib)n(ute)g(list)h(is)479 1974 y(set)g(to)f
+Fq(al)g Fv(\(gi)n(v)o(en)e(as)i(list)h(of)f(\(name,v)n(alue\))d
+(pairs\).)i(The)g(cop)o(y)g(does)h(not)f(ha)n(v)o(e)g(children)g(nor)g
+(a)h(parent.)f(It)h(does)f(not)479 2082 y(contain)g(processing)g
+(instructions.)g(See)i(the)f(e)o(xample)f(belo)n(w.)479
+2231 y(Note)h(that)h(you)e(can)h(specify)g(the)g(position)f(of)h(the)g
+(ne)n(w)g(node)f(by)h(the)g(optional)f(ar)o(gument)f
+Fq(~position)p Fv(.)p Black 396 2380 a Ft(\225)p Black
+81 w Fq(create_data)43 b(dtd)h(cdata)p Fv(:)20 b(Returns)g(a)h(\003at)g
+(cop)o(y)e(of)h(this)h(node)e(\(which)g(must)h(be)h(a)f(data)g(node\))f
+(with)h(the)479 2488 y(follo)n(wing)f(modi\002cations:)g(The)h(DTD)g
+(is)h(set)g(to)f Fq(dtd)p Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f
+Fq(T_data)p Fv(;)g(the)g(attrib)n(ute)g(list)h(is)479
+2596 y(empty)f(\(data)f(nodes)h(ne)n(v)o(er)f(ha)n(v)o(e)g(attrib)n
+(utes\);)h(the)g(list)h(of)f(children)f(and)h(PIs)h(is)g(empty)-5
+b(,)19 b(too)g(\(same)h(reason\).)f(The)479 2704 y(ne)n(w)h(node)f
+(does)h(not)g(ha)n(v)o(e)g(a)g(parent.)f(The)h(v)n(alue)g
+Fq(cdata)g Fv(is)h(the)f(ne)n(w)g(character)f(content)g(of)h(the)g
+(node.)f(See)i(the)479 2812 y(e)o(xample)e(belo)n(w.)p
+Black 396 2920 a Ft(\225)p Black 60 w Fq(keep_always_whitespace_mode)p
+Fv(:)e(Ev)o(en)i(data)h(nodes)f(which)h(are)g(normally)f(dropped)e
+(because)j(the)o(y)f(only)479 3028 y(contain)g(ignorable)f(whitespace,)
+h(can)h(added)e(to)i(this)h(node)d(once)h(this)i(mode)e(is)h(turned)f
+(on.)g(\(This)h(mode)f(is)h(useful)479 3136 y(to)h(produce)d(canonical)
+h(XML.\))396 3327 y Fu(V)-8 b(alidating)20 b(methods)h(.)f
+Fv(There)f(is)j(one)d(method)g(which)h(locally)f(v)n(alidates)h(the)g
+(node,)f(i.e.)i(checks)e(whether)g(the)396 3435 y(subnodes)g(match)h
+(the)g(content)f(model)g(of)h(this)h(node.)p Black 396
+3667 a Ft(\225)p Black 60 w Fq(local_validate)p Fv(:)e(Checks)h(that)g
+(this)h(node)e(conforms)f(to)j(the)f(DTD)g(by)g(comparing)e(the)i(type)
+g(of)g(the)479 3775 y(subnodes)e(with)i(the)g(content)e(model)h(for)g
+(this)h(node.)e(\(Applications)g(need)h(not)g(call)h(this)h(method)d
+(unless)h(the)o(y)g(add)479 3883 y(ne)n(w)h(nodes)g(themselv)o(es)f(to)
+i(the)f(tree.\))-2 4294 y Fp(3.2.3.)35 b(The)f(c)n(lass)h
+Fc(element_impl)396 4462 y Fv(This)21 b(class)g(is)g(an)f
+(implementation)e(of)i Fq(node)g Fv(which)g(realizes)g(element)g
+(nodes:)396 4642 y Fq(class)44 b([)h('ext)f(])h(element_impl)e(:)h
+('ext)g(->)h([)g('ext)f(])g(node)396 4875 y Fu(Constructor)-8
+b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)p
+Black 3798 5278 a Fr(56)p Black eop
+%%Page: 57 57
+57 56 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(new)45
+b(element_impl)d Fn(extension_object)396 770 y Fv(which)20
+b(creates)g(a)h(special)f(form)f(of)h(empty)f(element)h(which)g
+(already)f(contains)g(a)i(reference)d(to)j(the)396 878
+y Fl(extension_object)p Fv(,)d(b)n(ut)i(is)h(otherwise)f(empty)-5
+b(.)18 b(This)j(special)f(form)f(is)i(called)f(an)g Fr(e)n(xemplar)r
+Fv(.)g(The)g(purpose)f(of)396 986 y(e)o(x)o(emplars)g(is)i(that)f(the)o
+(y)g(serv)o(e)f(as)i(patterns)f(that)g(can)g(be)g(duplicated)f(and)g
+(\002lled)i(with)f(data.)g(The)g(method)396 1094 y Fq(create_element)f
+Fv(is)i(designed)e(to)h(perform)e(this)j(action.)396
+1243 y Fu(Example.)f Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
+1423 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 1520
+y(let)h(exemplar)222 b(=)45 b(new)f(element_impl)f(exemplar_ext)g(in)
+396 1711 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
+(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(element)g
+(nodes)f(are)i(created:)396 1891 y Fq(let)45 b(element)e(=)i(exemplar)e
+(#)i(create_element)e(dtd)h(\(T_element)f(name\))h(attlist)396
+2082 y Fv(The)20 b Fq(element)g Fv(is)h(a)f(cop)o(y)g(of)g
+Fq(exemplar)f Fv(\(e)n(v)o(en)g(the)h(e)o(xtension)f
+Fq(exemplar_ext)g Fv(has)h(been)g(copied\))e(which)396
+2190 y(ensures)h(that)h Fq(element)f Fv(and)g(its)i(e)o(xtension)d(are)
+i(objects)f(of)h(the)f(same)h(class)h(as)f(the)g(e)o(x)o(emplars;)e
+(note)h(that)h(you)e(need)396 2298 y(not)i(to)g(pass)h(a)g(class)g
+(name)f(or)f(other)h(meta)g(information.)d(The)j(cop)o(y)g(is)h
+(initially)f(connected)e(with)j(the)f Fq(dtd)p Fv(,)g(it)h(gets)f(a)396
+2406 y(node)f(type,)h(and)g(the)g(attrib)n(ute)g(list)h(is)g(\002lled.)
+f(The)g Fq(element)g Fv(is)h(no)n(w)e(fully)h(functional;)e(it)j(can)f
+(be)g(added)f(to)i(another)396 2514 y(element)f(as)h(child,)e(and)h(it)
+h(can)f(contain)f(references)g(to)h(subnodes.)-2 2884
+y Fp(3.2.4.)35 b(The)f(c)n(lass)h Fc(data_impl)396 3051
+y Fv(This)21 b(class)g(is)g(an)f(implementation)e(of)i
+Fq(node)g Fv(which)g(should)f(be)h(used)g(for)f(all)i(character)e(data)
+h(nodes:)396 3232 y Fq(class)44 b([)h('ext)f(])h(data_impl)e(:)i('ext)f
+(->)g([)h('ext)f(])h(node)396 3464 y Fu(Constructor)-8
+b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)
+396 3644 y Fq(new)45 b(data_impl)e Fn(extension_object)396
+3835 y Fv(which)20 b(creates)g(an)g(empty)g(e)o(x)o(emplar)e(node)h
+(which)h(is)h(connected)d(to)i Fl(extension_object)p
+Fv(.)e(The)i(node)f(does)396 3943 y(not)h(contain)f(a)i(reference)d(to)
+j(an)o(y)e(DTD,)h(and)g(because)f(of)h(this)h(it)g(cannot)e(be)h(added)
+f(to)i(node)e(trees.)396 4093 y(T)-7 b(o)21 b(get)f(a)g(fully)g(w)o
+(orking)f(data)h(node,)f(apply)g(the)h(method)f Fq(create_data)g
+Fv(to)h(the)g(e)o(x)o(emplar)f(\(see)h(e)o(xample\).)396
+4242 y Fu(Example.)g Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396
+4422 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 4519
+y(let)h(exemplar)222 b(=)45 b(new)f(exemplar_ext)f(data_impl)h(in)396
+4710 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e
+(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(data)g(nodes)
+g(are)g(created:)p Black 3797 5278 a Fr(57)p Black eop
+%%Page: 58 58
+58 57 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(let)45
+b(data_node)e(=)i(exemplar)e(#)i(create_data)e(dtd)h("The)g(characters)
+f(con-)396 676 y(tained)h(in)h(the)f(data)g(node")396
+867 y Fv(The)20 b Fq(data_node)f Fv(is)i(a)g(cop)o(y)e(of)h
+Fq(exemplar)p Fv(.)g(The)f(cop)o(y)h(is)h(initially)f(connected)e(with)
+j(the)f Fq(dtd)p Fv(,)g(and)f(it)i(is)h(\002lled)396
+975 y(with)f(character)e(material.)g(The)h Fq(data_node)f
+Fv(is)i(no)n(w)f(fully)g(functional;)e(it)j(can)f(be)g(added)f(to)h(an)
+h(element)e(as)i(child.)-2 1345 y Fp(3.2.5.)35 b(The)f(type)g
+Fc(spec)396 1512 y Fv(The)20 b(type)g Fq(spec)g Fv(de\002nes)g(a)g(w)o
+(ay)h(to)f(handle)f(the)h(details)h(of)f(creating)f(nodes)g(from)h(e)o
+(x)o(emplars.)396 1692 y Fq(type)44 b('ext)h(spec)396
+1790 y(constraint)e('ext)i(=)f('ext)g(node)h(#extension)396
+1984 y(val)g(make_spec_from_mapping)c(:)665 2081 y
+(?super_root_exemplar)h(:)i('ext)h(node)f(->)665 2178
+y(?comment_exemplar)e(:)j('ext)f(node)g(->)665 2275 y
+(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
+2372 y(?pinstr_mapping)f(:)h(\(string,)g('ext)g(node\))g(Hashtbl.t)f
+(->)665 2469 y(data_exemplar:)g('ext)h(node)g(->)665
+2567 y(default_element_exemplar:)d('ext)j(node)g(->)665
+2664 y(element_mapping:)e(\(string,)i('ext)g(node\))g(Hashtbl.t)f(->)
+665 2761 y(unit)h(->)755 2858 y('ext)g(spec)396 3052
+y(val)h(make_spec_from_alist)c(:)665 3149 y(?super_root_exemplar)h(:)i
+('ext)h(node)f(->)665 3247 y(?comment_exemplar)e(:)j('ext)f(node)g(->)
+665 3344 y(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665
+3441 y(?pinstr_alist)f(:)i(\(string)e(*)i('ext)f(node\))g(list)g(->)665
+3538 y(data_exemplar:)f('ext)h(node)g(->)665 3635 y
+(default_element_exemplar:)d('ext)j(node)g(->)665 3732
+y(element_alist:)f(\(string)g(*)i('ext)f(node\))g(list)g(->)665
+3829 y(unit)g(->)755 3927 y('ext)g(spec)396 4117 y Fv(The)20
+b(tw)o(o)h(functions)d Fq(make_spec_from_mapping)f Fv(and)j
+Fq(make_spec_from_alist)d Fv(create)j Fq(spec)g Fv(v)n(alues.)396
+4225 y(Both)g(functions)f(are)h(functionally)e(equi)n(v)n(alent)h(and)g
+(the)i(only)e(dif)n(ference)f(is)j(that)g(the)f(\002rst)h(function)d
+(prefers)396 4333 y(hashtables)i(and)g(the)g(latter)g(associati)n(v)o
+(e)g(lists)h(to)g(describe)e(mappings)g(from)g(names)h(to)g(e)o(x)o
+(emplars.)396 4483 y(Y)-9 b(ou)20 b(can)g(specify)f(e)o(x)o(emplars)g
+(for)g(the)i(v)n(arious)e(kinds)g(of)h(nodes)g(that)g(need)g(to)g(be)g
+(generated)e(when)i(an)g(XML)396 4591 y(document)e(is)k(parsed:)p
+Black 3800 5278 a Fr(58)p Black eop
+%%Page: 59 59
+59 58 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p
+Black 60 w Fq(~super_root_exemplar)p Fv(:)e(This)i(e)o(x)o(emplar)e(is)
+j(used)f(to)h(create)f(the)g(super)f(root.)h(This)g(special)g(node)g
+(is)h(only)479 687 y(created)f(if)g(the)g(corresponding)d
+(con\002guration)h(option)h(has)h(been)g(selected;)g(it)h(is)g(the)f
+(parent)f(node)g(of)h(the)h(root)479 795 y(node)e(which)h(may)g(be)g
+(con)m(v)o(enient)d(if)k(e)n(v)o(ery)e(w)o(orking)f(node)i(must)g(ha)n
+(v)o(e)f(a)i(parent.)p Black 396 903 a Ft(\225)p Black
+60 w Fq(~comment_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)d(is)j(used)f
+(when)f(a)i(comment)e(node)g(must)h(be)g(created.)g(Note)g(that)g(such)
+479 1011 y(nodes)g(are)g(only)f(created)h(if)g(the)g(corresponding)d
+(con\002guration)h(option)h(is)i("on".)p Black 396 1119
+a Ft(\225)p Black 60 w Fq(~default_pinstr_exemplar)p
+Fv(:)c(If)j(a)h(node)e(for)g(a)i(processing)e(instruction)g(must)h(be)g
+(created,)f(and)h(the)479 1226 y(instruction)f(is)i(not)f(listed)h(in)f
+(the)g(table)h(passed)f(by)f Fq(~pinstr_mapping)g Fv(or)h
+Fq(~pinstr_alist)p Fv(,)e(this)j(e)o(x)o(emplar)479 1334
+y(is)g(used.)f(Again)f(the)i(con\002guration)c(option)i(must)h(be)g
+("on")g(in)g(order)f(to)i(create)e(such)h(nodes)g(at)h(all.)p
+Black 396 1442 a Ft(\225)p Black 60 w Fq(~pinstr_mapping)e
+Fv(or)g Fq(~pinstr_alist)p Fv(:)g(Map)h(the)g(tar)o(get)g(names)f(of)h
+(processing)f(instructions)g(to)479 1550 y(e)o(x)o(emplars.)g(These)h
+(mappings)e(are)i(only)g(used)g(when)f(nodes)h(for)f(processing)g
+(instructions)g(are)h(created.)p Black 396 1658 a Ft(\225)p
+Black 60 w Fq(~data_exemplar)p Fv(:)f(The)h(e)o(x)o(emplar)e(for)h
+(ordinary)f(data)i(nodes.)p Black 396 1766 a Ft(\225)p
+Black 60 w Fq(~default_element_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)
+e(is)k(used)e(if)g(an)g(element)g(node)f(must)h(be)g(created,)f(b)n(ut)
+i(the)479 1874 y(element)f(type)g(cannot)f(be)h(found)e(in)j(the)f
+(tables)g Fq(element_mapping)e Fv(or)i Fq(element_alist)p
+Fv(.)p Black 396 1982 a Ft(\225)p Black 60 w Fq(~element_mapping)e
+Fv(or)i Fq(~element_alist)p Fv(:)f(Map)h(the)g(element)f(types)h(to)h
+(e)o(x)o(emplars.)d(These)i(mappings)f(are)479 2090 y(used)h(to)h
+(create)e(element)h(nodes.)396 2239 y(In)g(most)g(cases,)h(you)e(only)h
+(w)o(ant)g(to)g(create)g Fq(spec)g Fv(v)n(alues)g(to)h(pass)f(them)g
+(to)g(the)h(parser)e(functions)g(found)f(in)396 2347
+y Fq(Pxp_yacc)p Fv(.)h(Ho)n(we)n(v)o(er)m(,)f(it)j(might)f(be)g(useful)
+g(to)g(apply)f Fq(spec)h Fv(v)n(alues)g(directly)-5 b(.)396
+2497 y(The)20 b(follo)n(wing)f(functions)f(create)i(v)n(arious)f(types)
+h(of)g(nodes)g(by)g(selecting)f(the)i(corresponding)16
+b(e)o(x)o(emplar)j(from)g(the)396 2605 y(passed)h Fq(spec)g
+Fv(v)n(alue,)g(and)f(by)h(calling)g Fq(create_element)e
+Fv(or)i Fq(create_data)f Fv(on)h(the)g(e)o(x)o(emplar)-5
+b(.)396 2785 y Fq(val)45 b(create_data_node)d(:)665 2882
+y('ext)i(spec)h(->)665 2979 y(dtd)g(->)665 3076 y(\(*)g(data)f
+(material:)f(*\))i(string)f(->)845 3173 y('ext)g(node)396
+3368 y(val)h(create_element_node)c(:)665 3465 y(?position:\(string)h(*)
+j(int)f(*)h(int\))f(->)665 3562 y('ext)g(spec)h(->)665
+3659 y(dtd)g(->)665 3756 y(\(*)g(element)e(type:)h(*\))h(string)f(->)
+665 3853 y(\(*)h(attributes:)e(*\))h(\(string)g(*)h(string\))e(list)h
+(->)845 3950 y('ext)g(node)396 4145 y(val)h(create_super_root_node)c(:)
+665 4242 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
+4339 y('ext)g(spec)h(->)710 4436 y(dtd)f(->)889 4533
+y('ext)h(node)396 4728 y(val)g(create_comment_node)c(:)665
+4825 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)p
+Black 3800 5278 a Fr(59)p Black eop
+%%Page: 60 60
+60 59 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 665 579 a Fq('ext)44
+b(spec)h(->)665 676 y(dtd)g(->)665 773 y(\(*)g(comment)e(text:)h(*\))h
+(string)f(->)845 870 y('ext)g(node)396 1065 y(val)h(create_pinstr_node)
+c(:)665 1162 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665
+1259 y('ext)g(spec)h(->)665 1356 y(dtd)g(->)665 1453
+y(proc_instruction)d(->)845 1550 y('ext)i(node)-2 2003
+y Fp(3.2.6.)35 b(Examples)396 2171 y Fu(Building)22 b(tr)o(ees.)d
+Fv(Here)h(is)h(the)g(piece)e(of)h(code)g(that)g(creates)g(the)h(tree)f
+(of)g(the)g(\002gure)f Fr(A)i(tr)m(ee)g(with)f(element)g(nodes,)396
+2279 y(data)g(nodes,)f(and)g(attrib)n(utes)p Fv(.)h(The)g(e)o(xtension)
+f(object)h(and)f(the)h(DTD)h(are)f(be)o(yond)e(the)i(scope)g(of)g(this)
+g(e)o(xample.)396 2459 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(\(*)h
+(some)f(extension)f(*\))i(in)396 2556 y(let)g(dtd)f(=)h(...)f(\(*)g
+(some)h(DTD)f(*\))g(in)396 2750 y(let)h(element_exemplar)d(=)i(new)h
+(element_impl)e(exemplar_ext)f(in)396 2847 y(let)j(data_exemplar)177
+b(=)44 b(new)h(data_impl)178 b(exemplar_ext)42 b(in)396
+3042 y(let)j(a1)f(=)h(element_exemplar)d(#)j(cre-)396
+3139 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("apple"])396
+3236 y(and)h(b1)f(=)h(element_exemplar)d(#)j(create_element)d(dtd)i
+(\(T_element)g("b"\))g([])396 3333 y(and)h(c1)f(=)h(element_exemplar)d
+(#)j(create_element)d(dtd)i(\(T_element)g("c"\))g([])396
+3430 y(and)h(a2)f(=)h(element_exemplar)d(#)j(cre-)396
+3527 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("orange"])
+396 3624 y(in)396 3819 y(let)h(cherries)e(=)i(data_exemplar)d(#)j
+(create_data)e(dtd)h("Cherries")g(in)396 3916 y(let)h(orange)133
+b(=)45 b(data_exemplar)d(#)j(create_data)e(dtd)h("An)h(orange")e(in)396
+4110 y(a1)i(#)f(add_node)g(b1;)396 4207 y(a1)h(#)f(add_node)g(c1;)396
+4304 y(b1)h(#)f(add_node)g(a2;)396 4401 y(b1)h(#)f(add_node)g
+(cherries;)396 4499 y(a2)h(#)f(add_node)g(orange;)396
+4689 y Fv(Alternati)n(v)o(ely)-5 b(,)18 b(the)i(last)h(block)f(of)g
+(statements)g(could)f(also)i(be)f(written)g(as:)396 4870
+y Fq(a1)45 b(#)f(set_nodes)g([b1;)g(c1];)p Black 3800
+5278 a Fr(60)p Black eop
+%%Page: 61 61
+61 60 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(b1)45
+b(#)f(set_nodes)g([a2;)g(cherries];)396 676 y(a2)h(#)f(set_nodes)g
+([orange];)396 867 y Fv(The)20 b(root)g(of)g(the)g(tree)g(is)h
+Fq(a1)p Fv(,)f(i.e.)g(it)h(is)g(true)f(that)396 1047
+y Fq(x)45 b(#)g(root)f(==)g(a1)396 1238 y Fv(for)20 b(e)n(v)o(ery)f(x)h
+(from)f({)i Fq(a1)p Fv(,)f Fq(a2)p Fv(,)g Fq(b1)p Fv(,)g
+Fq(c1)p Fv(,)g Fq(cherries)p Fv(,)g Fq(orange)f Fv(}.)396
+1388 y(Furthermore,)f(the)i(follo)n(wing)f(properties)f(hold:)486
+1568 y Fq(a1)44 b(#)h(attribute)e("att")h(=)h(Value)f("apple")396
+1665 y(&)h(a2)f(#)h(attribute)e("att")h(=)h(Value)f("orange")396
+1859 y(&)h(cherries)e(#)i(data)f(=)h("Cherries")396 1956
+y(&)135 b(orange)43 b(#)i(data)f(=)h("An)f(orange")396
+2053 y(&)314 b(a1)44 b(#)h(data)f(=)h("CherriesAn)e(orange")396
+2248 y(&)314 b(a1)44 b(#)h(node_type)e(=)i(T_element)e("a")396
+2345 y(&)314 b(a2)44 b(#)h(node_type)e(=)i(T_element)e("a")396
+2442 y(&)314 b(b1)44 b(#)h(node_type)e(=)i(T_element)e("b")396
+2539 y(&)314 b(c1)44 b(#)h(node_type)e(=)i(T_element)e("c")396
+2636 y(&)i(cherries)e(#)i(node_type)e(=)i(T_data)396
+2733 y(&)135 b(orange)43 b(#)i(node_type)e(=)i(T_data)396
+2928 y(&)314 b(a1)44 b(#)h(sub_nodes)e(=)i([)g(b1;)f(c1)h(])396
+3025 y(&)314 b(a2)44 b(#)h(sub_nodes)e(=)i([)g(orange)f(])396
+3122 y(&)314 b(b1)44 b(#)h(sub_nodes)e(=)i([)g(a2;)f(cherries)g(])396
+3219 y(&)314 b(c1)44 b(#)h(sub_nodes)e(=)i([])396 3316
+y(&)g(cherries)e(#)i(sub_nodes)e(=)i([])396 3413 y(&)135
+b(orange)43 b(#)i(sub_nodes)e(=)i([])396 3608 y(&)314
+b(a2)44 b(#)h(parent)f(==)g(a1)396 3705 y(&)314 b(b1)44
+b(#)h(parent)f(==)g(b1)396 3802 y(&)314 b(c1)44 b(#)h(parent)f(==)g(a1)
+396 3899 y(&)h(cherries)e(#)i(parent)f(==)g(b1)396 3996
+y(&)135 b(orange)43 b(#)i(parent)f(==)g(a2)396 4229 y
+Fu(Sear)o(ching)19 b(nodes.)g Fv(The)g(follo)n(wing)e(function)h
+(searches)h(all)g(nodes)g(of)g(a)g(tree)h(for)e(which)h(a)g(certain)g
+(condition)e(holds:)396 4409 y Fq(let)45 b(rec)f(search)g(p)g(t)h(=)486
+4506 y(if)f(p)h(t)g(then)576 4603 y(t)f(::)h(search_list)e(p)h(\(t)h(#)
+g(sub_nodes\))486 4700 y(else)576 4797 y(search_list)e(p)h(\(t)h(#)f
+(sub_nodes\))p Black 3800 5278 a Fr(61)p Black eop
+%%Page: 62 62
+62 61 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 676 a Fq(and)45
+b(search_list)e(p)h(l)h(=)486 773 y(match)f(l)h(with)576
+870 y([])268 b(-)p Fo(>)45 b Fq([])486 967 y(|)g(t)f(::)h(l')f(-)p
+Fo(>)h Fq(\(search)e(p)i(t\))f(@)h(\(search_list)e(p)i(l'\))396
+1065 y(;;)396 1297 y Fv(F)o(or)20 b(e)o(xample,)f(if)h(you)f(w)o(ant)i
+(to)f(search)g(all)h(elements)f(of)f(a)i(certain)f(type)f
+Fq(et)p Fv(,)i(the)f(function)e Fq(search)i Fv(can)g(be)g(applied)396
+1405 y(as)h(follo)n(ws:)396 1585 y Fq(let)45 b(search_element_type)c
+(et)k(t)f(=)486 1682 y(search)g(\(fun)g(x)h(-)p Fo(>)f
+Fq(x)h(#)f(node_type)g(=)g(T_element)g(et\))g(t)396 1779
+y(;;)396 2012 y Fu(Getting)20 b(attrib)n(ute)f(v)o(alues.)h
+Fv(Suppose)f(we)i(ha)n(v)o(e)f(the)g(declaration:)396
+2192 y Fq(<!ATTLIST)44 b(e)g(a)h(CDATA)f(#REQUIRED)934
+2289 y(b)h(CDATA)f(#IMPLIED)934 2386 y(c)h(CDATA)f("12345">)396
+2577 y Fv(In)20 b(this)h(case,)f(e)n(v)o(ery)f(element)h
+Fq(e)g Fv(must)h(ha)n(v)o(e)e(an)h(attrib)n(ute)g Fq(a)p
+Fv(,)g(otherwise)g(the)g(parser)g(w)o(ould)f(indicate)h(an)g(error)-5
+b(.)19 b(If)h(the)396 2685 y(O'Caml)h(v)n(ariable)e Fq(n)h
+Fv(holds)g(the)g(node)f(of)h(the)g(tree)h(corresponding)16
+b(to)21 b(the)f(element,)f(you)g(can)h(get)h(the)f(v)n(alue)f(of)h(the)
+396 2793 y(attrib)n(ute)g Fq(a)h Fv(by)396 2973 y Fq(let)45
+b(value_of_a)e(=)h(n)h(#)g(required_string_attribute)40
+b("a")396 3164 y Fv(which)20 b(is)h(more)e(or)h(less)i(an)e(abbre)n
+(viation)d(for)396 3344 y Fq(let)45 b(value_of_a)e(=)486
+3442 y(match)h(n)h(#)f(attribute)g("a")g(with)576 3539
+y(Value)g(s)g(->)h(s)486 3636 y(|)g(_)313 b(->)45 b(assert)f(false)396
+3827 y Fv(-)21 b(as)g(the)f(attrib)n(ute)g(is)h(required,)d(the)i
+Fq(attribute)f Fv(method)g(al)o(w)o(ays)i(returns)e(a)i
+Fq(Value)p Fv(.)396 3976 y(In)f(contrast)g(to)g(this,)h(the)f(attrib)n
+(ute)g Fq(b)g Fv(can)g(be)g(omitted.)g(In)f(this)i(case,)g(the)f
+(method)396 4084 y Fq(required_string_attribute)d Fv(w)o(orks)j(only)f
+(if)h(the)h(attrib)n(ute)f(is)h(there,)e(and)h(the)g(method)f(will)i(f)
+o(ail)f(if)h(the)396 4192 y(attrib)n(ute)f(is)h(missing.)f(T)-7
+b(o)20 b(get)h(the)f(v)n(alue,)f(you)g(can)h(apply)g(the)g(method)f
+Fq(optional_string_attribute)p Fv(:)396 4372 y Fq(let)45
+b(value_of_b)e(=)h(n)h(#)g(optional_string_attribute)40
+b("b")396 4563 y Fv(No)n(w)-5 b(,)20 b Fq(value_of_b)f
+Fv(is)i(of)f(type)g Fq(string)43 b(option)p Fv(,)20 b(and)f
+Fq(None)i Fv(represents)e(the)h(omitted)g(attrib)n(ute.)f(Alternati)n
+(v)o(ely)-5 b(,)396 4671 y(you)20 b(could)f(also)h(use)h
+Fq(attribute)p Fv(:)396 4851 y Fq(let)45 b(value_of_b)e(=)p
+Black 3800 5278 a Fr(62)p Black eop
+%%Page: 63 63
+63 62 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(match)44
+b(n)h(#)f(attribute)g("b")g(with)576 676 y(Value)g(s)313
+b(->)45 b(Some)f(s)486 773 y(|)h(Implied_value)d(->)j(None)486
+870 y(|)g(_)582 b(->)45 b(assert)f(false)396 1103 y Fv(The)20
+b(attrib)n(ute)g Fq(c)h Fv(beha)n(v)o(es)e(much)g(lik)o(e)h
+Fq(a)p Fv(,)h(because)e(it)i(has)g(al)o(w)o(ays)f(a)h(v)n(alue.)e(If)h
+(the)g(attrib)n(ute)g(is)h(omitted,)f(the)g(def)o(ault,)396
+1211 y(here)g("12345",)e(will)j(be)f(returned)e(instead.)i(Because)g
+(of)g(this,)h(you)e(can)h(again)f(use)396 1319 y Fq
+(required_string_attribute)e Fv(to)j(get)g(the)h(v)n(alue.)396
+1468 y(The)f(type)g Fq(CDATA)g Fv(is)h(the)f(most)g(general)f(string)h
+(type.)g(The)g(types)g Fq(NMTOKEN)p Fv(,)f Fq(ID)p Fv(,)h
+Fq(IDREF)p Fv(,)g Fq(ENTITY)p Fv(,)f(and)h(all)396 1576
+y(enumerators)e(and)i(notations)f(are)h(special)h(forms)e(of)h(string)g
+(types)g(that)g(restrict)g(the)h(possible)f(v)n(alues.)f(From)396
+1684 y(O'Caml,)h(the)o(y)g(beha)n(v)o(e)f(lik)o(e)h Fq(CDATA)p
+Fv(,)g(i.e.)g(you)f(can)h(use)h(the)f(methods)f Fq
+(required_string_attribute)e Fv(and)396 1792 y Fq
+(optional_string_attribute)p Fv(,)g(too.)396 1941 y(In)j(contrast)g(to)
+g(this,)h(the)f(types)g Fq(NMTOKENS)p Fv(,)f Fq(IDREFS)p
+Fv(,)g(and)h Fq(ENTITIES)g Fv(mean)f(lists)j(of)e(strings.)g(Suppose)f
+(we)h(ha)n(v)o(e)396 2049 y(the)g(declaration:)396 2229
+y Fq(<!ATTLIST)44 b(f)g(d)h(NMTOKENS)e(#REQUIRED)934
+2327 y(e)i(NMTOKENS)e(#IMPLIED>)396 2517 y Fv(The)20
+b(type)g Fq(NMTOKENS)f Fv(stands)i(for)e(lists)j(of)e(space-separated)e
+(tok)o(ens;)i(for)f(e)o(xample)g(the)h(v)n(alue)g Fq("1)44
+b(abc)h(23ef")396 2625 y Fv(means)20 b(the)g(list)i Fq(["1";)44
+b("abc";)f("23ef"])p Fv(.)20 b(\(Again,)e Fq(IDREFS)i
+Fv(and)g Fq(ENTITIES)f Fv(ha)n(v)o(e)h(more)f(restricted)h(v)n
+(alues.\))396 2733 y(T)-7 b(o)21 b(get)f(the)g(v)n(alue)g(of)f(attrib)n
+(ute)h Fq(d)p Fv(,)h(one)e(can)h(use)396 2913 y Fq(let)45
+b(value_of_d)e(=)h(n)h(#)g(required_list_attribute)c("d")396
+3104 y Fv(or)396 3285 y Fq(let)k(value_of_d)e(=)486 3382
+y(match)h(n)h(#)f(attribute)g("d")g(with)576 3479 y(Valuelist)f(l)i(->)
+f(l)486 3576 y(|)h(_)493 b(->)44 b(assert)g(false)396
+3767 y Fv(As)21 b Fq(d)g Fv(is)g(required,)d(the)i(attrib)n(ute)g
+(cannot)f(be)h(omitted,)g(and)f(the)h Fq(attribute)g
+Fv(method)e(returns)i(al)o(w)o(ays)g(a)396 3875 y Fq(Valuelist)p
+Fv(.)396 4024 y(F)o(or)g(optional)f(attrib)n(utes)h(lik)o(e)h
+Fq(e)p Fv(,)f(apply)396 4204 y Fq(let)45 b(value_of_e)e(=)h(n)h(#)g
+(optional_list_attribute)c("e")396 4395 y Fv(or)396 4576
+y Fq(let)k(value_of_e)e(=)486 4673 y(match)h(n)h(#)f(attribute)g("e")g
+(with)576 4770 y(Valuelist)f(l)134 b(->)45 b(l)486 4867
+y(|)g(Implied_value)d(->)j([])p Black 3800 5278 a Fr(63)p
+Black eop
+%%Page: 64 64
+64 63 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 486 579 a Fq(|)45
+b(_)582 b(->)45 b(assert)f(false)396 770 y Fv(Here,)20
+b(the)g(case)h(that)f(the)g(attrib)n(ute)g(is)h(missing)f(counts)g(lik)
+o(e)g(the)h(empty)e(list.)-2 1139 y Fp(3.2.7.)35 b(Iterator)n(s)396
+1307 y Fv(There)20 b(are)g(also)g(se)n(v)o(eral)g(iterators)g(in)g
+(Pxp_document;)d(please)j(see)h(the)f(mli)h(\002le)f(for)g(details.)g
+(Y)-9 b(ou)20 b(can)g(\002nd)396 1415 y(e)o(xamples)f(for)h(them)g(in)g
+(the)g("simple_transformation")d(directory)-5 b(.)396
+1595 y Fq(val)45 b(find)f(:)g(?deeply:bool)f(->)889 1692
+y(f:\('ext)h(node)g(->)h(bool\))f(->)g('ext)g(node)h(->)f('ext)g(node)
+396 1887 y(val)h(find_all)e(:)i(?deeply:bool)e(->)1069
+1984 y(f:\('ext)g(node)i(->)f(bool\))g(->)h('ext)f(node)g(->)g('ext)h
+(node)f(list)396 2178 y(val)h(find_element)d(:)j(?deeply:bool)e(->)1248
+2275 y(string)h(->)g('ext)h(node)f(->)g('ext)g(node)396
+2469 y(val)h(find_all_elements)d(:)i(?deeply:bool)f(->)1472
+2567 y(string)h(->)h('ext)f(node)g(->)g('ext)h(node)f(list)396
+2761 y(exception)g(Skip)396 2858 y(val)h(map_tree)e(:)90
+b(pre:\('exta)43 b(node)h(->)g('extb)g(node\))g(->)1069
+2955 y(?post:\('extb)f(node)h(->)g('extb)g(node\))g(->)1069
+3052 y('exta)g(node)g(->)1248 3149 y('extb)g(node)396
+3441 y(val)h(map_tree_sibl)d(:)755 3538 y(pre:)i(\('exta)g(node)g
+(option)g(->)g('exta)g(node)h(->)f('exta)g(node)g(option)g(->)1203
+3635 y('extb)g(node\))g(->)710 3732 y(?post:\('extb)f(node)h(option)g
+(->)g('extb)g(node)h(->)f('extb)g(node)g(option)g(->)1203
+3829 y('extb)g(node\))g(->)710 3927 y('exta)g(node)g(->)889
+4024 y('extb)g(node)396 4218 y(val)h(iter_tree)e(:)i(?pre:\('ext)e
+(node)h(->)g(unit\))g(->)1114 4315 y(?post:\('ext)f(node)h(->)g(unit\))
+g(->)1114 4412 y('ext)g(node)g(->)1293 4509 y(unit)396
+4704 y(val)h(iter_tree_sibl)d(:)710 4801 y(?pre:)i(\('ext)g(node)g
+(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g(unit\))g
+(->)p Black 3800 5278 a Fr(64)p Black eop
+%%Page: 65 65
+65 64 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 710 579 a Fq(?post:\('ext)43
+b(node)h(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g
+(unit\))g(->)710 676 y('ext)g(node)g(->)889 773 y(unit)-2
+1358 y Fx(3.3.)39 b(The)g(c)m(lass)g(type)g Fb(extension)396
+1610 y Fq(class)44 b(type)g([)h('node)f(])h(extension)e(=)486
+1707 y(object)h(\('self\))576 1804 y(method)f(clone)h(:)h('self)665
+1901 y(\(*)g("clone")e(should)h(return)g(an)h(exact)f(deep)g(copy)g(of)
+g(the)h(object.)e(*\))576 1998 y(method)g(node)i(:)f('node)665
+2095 y(\(*)h("node")f(returns)f(the)i(corresponding)d(node)i(of)h(this)
+f(extension.)f(This)h(method)710 2193 y(*)h(intended)e(to)i(return)f
+(exactly)f(what)h(previ-)396 2290 y(ously)g(has)h(been)f(set)g(by)h
+("set_node".)710 2387 y(*\))576 2484 y(method)e(set_node)h(:)h('node)f
+(->)g(unit)665 2581 y(\(*)h("set_node")e(is)h(invoked)g(once)g(the)h
+(extension)e(is)h(associated)g(to)g(a)h(new)710 2678
+y(*)g(node)f(object.)710 2775 y(*\))486 2873 y(end)396
+3063 y Fv(This)21 b(is)g(the)f(type)g(of)g(classes)h(used)f(for)f(node)
+h(e)o(xtensions.)e(F)o(or)i(e)n(v)o(ery)f(node)g(of)h(the)g(document)e
+(tree,)i(there)g(is)h(not)396 3171 y(only)f(the)g Fq(node)g
+Fv(object,)f(b)n(ut)h(also)g(an)g Fq(extension)f Fv(object.)h(The)f
+(latter)i(has)f(minimal)f(functionality;)f(it)j(has)f(only)g(the)396
+3279 y(necessary)g(methods)f(to)h(be)g(attached)g(to)g(the)g(node)f
+(object)h(containing)e(the)j(details)f(of)g(the)g(node)f(instance.)h
+(The)396 3387 y(e)o(xtension)f(object)h(is)h(called)f(e)o(xtension)f
+(because)g(its)i(purpose)e(is)i(e)o(xtensibility)-5 b(.)396
+3537 y(F)o(or)20 b(some)g(reasons,)g(it)h(is)g(impossible)e(to)i(deri)n
+(v)o(e)d(the)j Fq(node)f Fv(classes)h(\(i.e.)f Fq(element_impl)f
+Fv(and)g Fq(data_impl)p Fv(\))g(such)396 3645 y(that)i(the)f
+(subclasses)g(can)g(be)g(e)o(xtended)f(by)g(ne)n(w)h(ne)n(w)g(methods.)
+f(But)i(subclassing)f(nodes)f(is)i(a)g(great)f(feature,)396
+3753 y(because)g(it)h(allo)n(ws)f(the)g(user)g(to)h(pro)o(vide)d(dif)n
+(ferent)g(classes)k(for)d(dif)n(ferent)g(types)h(of)g(nodes.)f(The)h(e)
+o(xtension)f(objects)396 3860 y(are)h(a)h(w)o(orkaround)c(that)j(is)i
+(as)e(po)n(werful)f(as)i(direct)f(subclassing,)f(the)h(costs)h(are)f
+(some)g(notation)f(o)o(v)o(erhead.)p Black 3800 5278
+a Fr(65)p Black eop
+%%Page: 66 66
+66 65 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-6.)f
+(The)i(structur)o(e)f(of)g(nodes)g(and)h(extensions)396
+1928 y
+ currentpoint currentpoint translate 1 1 scale neg exch neg exch translate
+ 396 1928 a @beginspecial 0 @llx 0 @lly 206 @urx
+140 @ury 2060 @rwi @setspecial
+%%BeginDocument: pic/extension_general.ps
+%!PS-Adobe-2.0 EPSF-2.0
+%%Title: src/pic/extension_general.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 1
+%%CreationDate: Sun Aug 27 02:05:42 2000
+%%For: gerd@ice (Gerd Stolpmann)
+%%Orientation: Portrait
+%%BoundingBox: 0 0 206 140
+%%Pages: 0
+%%BeginSetup
+%%EndSetup
+%%Magnification: 0.8000
+%%EndComments
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+save
+-22.0 205.0 translate
+1 -1 scale
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+ /DrawEllipse {
+ /endangle exch def
+ /startangle exch def
+ /yrad exch def
+ /xrad exch def
+ /y exch def
+ /x exch def
+ /savematrix mtrx currentmatrix def
+ x y tr xrad yrad sc 0 0 1 startangle endangle arc
+ closepath
+ savematrix setmatrix
+ } def
+
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+%%EndProlog
+
+$F2psBegin
+10 setmiterlimit
+n -1000 5050 m -1000 -1000 l 5514 -1000 l 5514 5050 l cp clip
+ 0.05039 0.05039 sc
+7.500 slw
+% Ellipse
+n 1575 2250 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 1575 3375 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 675 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2475 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3600 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 2880 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 4320 2475 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Ellipse
+n 3600 1485 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr
+
+% Polyline
+n 675 3150 m 1395 2385 l gs col0 s gr
+% Polyline
+n 1575 2475 m 1575 3150 l gs col0 s gr
+% Polyline
+n 1755 2385 m 2475 3150 l gs col0 s gr
+% Polyline
+ [60] 0 sd
+gs clippath
+3288 1467 m 3412 1462 l 3305 1524 l 3435 1487 l 3418 1429 l cp
+clip
+n 1537 2010 m 3412 1462 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 3288 1467 m 3412 1462 l 3305 1524 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+1796 2042 m 1672 2047 l 1779 1984 l 1649 2022 l 1666 2080 l cp
+clip
+n 3412 1537 m 1672 2047 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 1796 2042 m 1672 2047 l 1779 1984 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+2584 2524 m 2707 2512 l 2604 2581 l 2731 2535 l 2711 2479 l cp
+933 3183 m 810 3195 l 913 3126 l 786 3172 l 806 3228 l cp
+clip
+n 810 3195 m 2707 2512 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 933 3183 m 810 3195 l 913 3126 l col0 s
+% arrowhead
+n 2584 2524 m 2707 2512 l 2604 2581 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+3319 2594 m 3442 2580 l 3340 2650 l 3467 2603 l 3446 2547 l cp
+1863 3203 m 1740 3217 l 1842 3147 l 1715 3194 l 1736 3250 l cp
+clip
+n 1740 3217 m 3442 2580 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 1863 3203 m 1740 3217 l 1842 3147 l col0 s
+% arrowhead
+n 3319 2594 m 3442 2580 l 3340 2650 l col0 s
+% Polyline
+ [60] 0 sd
+gs clippath
+4054 2626 m 4177 2610 l 4076 2682 l 4202 2632 l 4180 2577 l cp
+2763 3194 m 2640 3210 l 2741 3138 l 2615 3188 l 2637 3243 l cp
+clip
+n 2640 3210 m 4177 2610 l gs col0 s gr gr
+ [] 0 sd
+% arrowhead
+n 2763 3194 m 2640 3210 l 2741 3138 l col0 s
+% arrowhead
+n 4054 2626 m 4177 2610 l 4076 2682 l col0 s
+/Courier-Bold ff 180.00 scf sf
+3555 1530 m
+gs 1 -1 sc (x) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+1530 2295 m
+gs 1 -1 sc (n) col0 sh gr
+/Courier ff 180.00 scf sf
+1658 1950 m
+gs 1 -1 sc 17.0 rot (n # extension) col0 sh gr
+/Courier ff 180.00 scf sf
+2475 1950 m
+gs 1 -1 sc 17.0 rot (x # node) col0 sh gr
+/Helvetica ff 180.00 scf sf
+1020 4050 m
+gs 1 -1 sc (The node tree) col0 sh gr
+/Helvetica ff 180.00 scf sf
+3225 3285 m
+gs 1 -1 sc (The extensions) col0 sh gr
+$F2psEnd
+rs
+
+%%EndDocument
+ @endspecial 396 1928 a
+ currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch
+neg exch translate
+ 396 1928 a 357 x Fv(The)f(picture)f(sho)n(ws)i
+(ho)n(w)e(the)i(nodes)e(and)h(e)o(xtensions)f(are)h(link)o(ed)f
+(together)-5 b(.)19 b(Ev)o(ery)g(node)g(has)i(a)f(reference)f(to)h(its)
+396 2393 y(e)o(xtension,)f(and)g(e)n(v)o(ery)g(e)o(xtension)g(has)h(a)h
+(reference)d(to)j(its)g(node.)e(The)h(methods)f Fq(extension)g
+Fv(and)h Fq(node)g Fv(follo)n(w)396 2501 y(these)h(references;)e(a)h
+(typical)g(phrase)f(is)396 2681 y Fq(self)44 b(#)h(node)f(#)h
+(attribute)e("xy")396 2872 y Fv(to)21 b(get)f(the)g(v)n(alue)g(of)f(an)
+i(attrib)n(ute)e(from)h(a)g(method)f(de\002ned)g(in)h(the)h(e)o
+(xtension)d(object;)i(or)396 3053 y Fq(self)44 b(#)h(node)f(#)h(iter)
+486 3150 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)f(extension)g(#)g(my_method)
+g(...\))396 3341 y Fv(to)21 b(iterate)f(o)o(v)o(er)f(the)h(subnodes)f
+(and)g(to)i(call)f Fq(my_method)f Fv(of)h(the)h(corresponding)16
+b(e)o(xtension)j(objects.)396 3490 y(Note)h(that)h(e)o(xtension)d
+(objects)i(do)g(not)g(ha)n(v)o(e)g(references)e(to)j(subnodes)e(\(or)g
+("sube)o(xtensions"\))f(themselv)o(es;)h(in)i(order)396
+3598 y(to)g(get)f(one)f(of)h(the)h(children)d(of)i(an)g(e)o(xtension)f
+(you)g(must)i(\002rst)g(go)e(to)i(the)f(node)f(object,)h(then)f(get)h
+(the)h(child)e(node,)396 3706 y(and)h(\002nally)g(reach)f(the)i(e)o
+(xtension)d(that)j(is)g(logically)e(the)h(child)g(of)g(the)g(e)o
+(xtension)f(you)g(started)h(with.)-2 4034 y Fp(3.3.1.)35
+b(Ho)n(w)f(to)f(de\002ne)h(an)g(e)n(xtension)i(c)n(lass)396
+4202 y Fv(At)21 b(minimum,)e(you)g(must)h(de\002ne)g(the)g(methods)f
+Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)f Fq(set_node)h
+Fv(such)f(that)i(your)e(class)i(is)396 4310 y(compatible)e(with)h(the)h
+(type)e Fq(extension)p Fv(.)g(The)h(method)f Fq(set_node)g
+Fv(is)i(called)f(during)f(the)h(initialization)g(of)g(the)396
+4418 y(node,)f(or)h(after)g(a)h(node)e(has)h(been)g(cloned;)f(the)h
+(node)f(object)h(in)m(v)n(ok)o(es)f Fq(set_node)g Fv(on)h(the)g(e)o
+(xtension)f(object)h(to)g(tell)396 4526 y(it)h(that)f(this)h(node)e(is)
+i(no)n(w)f(the)g(object)g(the)g(e)o(xtension)f(is)i(link)o(ed)f(to.)g
+(The)f(e)o(xtension)g(must)h(return)f(the)i(node)e(object)396
+4633 y(passed)h(as)h(ar)o(gument)d(of)i Fq(set_node)f
+Fv(when)h(the)g Fq(node)g Fv(method)f(is)i(called.)p
+Black 3798 5278 a Fr(66)p Black eop
+%%Page: 67 67
+67 66 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(The)g
+Fq(clone)g Fv(method)f(must)h(return)f(a)i(cop)o(y)e(of)h(the)g(e)o
+(xtension)f(object;)h(at)g(least)h(the)f(object)g(itself)h(must)f(be)
+396 687 y(duplicated,)f(b)n(ut)h(if)g(required,)e(the)j(cop)o(y)e
+(should)g(deeply)g(duplicate)g(all)i(objects)f(and)g(v)n(alues)g(that)g
+(are)g(referred)e(by)396 795 y(the)i(e)o(xtension,)f(too.)h(Whether)f
+(this)i(is)g(required,)d(depends)h(on)h(the)g(application;)f
+Fq(clone)h Fv(is)h(in)m(v)n(ok)o(ed)d(by)i(the)g(node)396
+903 y(object)g(when)g(one)f(of)h(its)h(cloning)e(methods)g(is)i
+(called.)396 1052 y(A)g(good)e(starting)h(point)f(for)h(an)g(e)o
+(xtension)e(class:)396 1232 y Fq(class)44 b(custom_extension)e(=)486
+1329 y(object)i(\(self\))576 1524 y(val)g(mutable)g(node)g(=)g(\(None)g
+(:)h(custom_extension)d(node)i(option\))576 1718 y(method)f(clone)h(=)h
+({<)g(>})576 1912 y(method)e(node)i(=)665 2009 y(match)f(node)g(with)
+845 2107 y(None)g(->)934 2204 y(assert)g(false)755 2301
+y(|)h(Some)f(n)g(->)h(n)576 2495 y(method)e(set_node)h(n)h(=)665
+2592 y(node)f(<-)h(Some)f(n)486 2786 y(end)396 2977 y
+Fv(This)21 b(class)g(is)g(compatible)e(with)h Fq(extension)p
+Fv(.)f(The)h(purpose)e(of)i(de\002ning)f(such)h(a)h(class)g(is,)g(of)f
+(course,)f(adding)396 3085 y(further)g(methods;)g(and)h(you)f(can)h(do)
+g(it)h(without)e(restriction.)396 3235 y(Often,)h(you)f(w)o(ant)h(not)g
+(only)g(one)f(e)o(xtension)g(class.)i(In)f(this)h(case,)f(it)h(is)g
+(the)f(simplest)h(w)o(ay)f(that)g(all)h(your)e(classes)i(\(for)396
+3343 y(one)f(kind)f(of)h(document\))e(ha)n(v)o(e)i(the)g(same)g(type)g
+(\(with)g(respect)g(to)g(the)g(interf)o(ace;)g(i.e.)g(it)h(does)f(not)g
+(matter)g(if)g(your)396 3451 y(classes)i(dif)n(fer)d(in)h(the)g
+(de\002ned)f(pri)n(v)n(ate)h(methods)f(and)g(instance)h(v)n(ariables,)f
+(b)n(ut)h(public)g(methods)f(count\).)f(This)396 3559
+y(approach)g(a)n(v)n(oids)i(lots)h(of)f(coercions)f(and)h(problems)e
+(with)j(type)f(incompatibilities.)e(It)j(is)g(simple)f(to)g(implement:)
+396 3739 y Fq(class)44 b(custom_extension)e(=)486 3836
+y(object)i(\(self\))576 3933 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h
+(custom_extension)d(node)i(option\))576 4127 y(method)f(clone)h(=)h
+(...)269 b(\(*)44 b(see)g(above)g(*\))576 4224 y(method)f(node)i(=)f
+(...)314 b(\(*)44 b(see)g(above)g(*\))576 4322 y(method)f(set_node)h(n)
+h(=)f(...)h(\(*)f(see)g(above)g(*\))576 4516 y(method)f(virtual)h
+(my_method1)f(:)i(...)576 4613 y(method)e(virtual)h(my_method2)f(:)i
+(...)576 4710 y(...)f(\(*)g(etc.)h(*\))486 4807 y(end)p
+Black 3797 5278 a Fr(67)p Black eop
+%%Page: 68 68
+68 67 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 676 a Fq(class)44
+b(custom_extension_kind_A)d(=)486 773 y(object)j(\(self\))576
+870 y(inherit)f(custom_extension)576 1065 y(method)g(my_method1)h(=)g
+(...)576 1162 y(method)f(my_method2)h(=)g(...)486 1259
+y(end)396 1453 y(class)g(custom_extension_kind_B)d(=)486
+1550 y(object)j(\(self\))576 1647 y(inherit)f(custom_extension)576
+1842 y(method)g(my_method1)h(=)g(...)576 1939 y(method)f(my_method2)h
+(=)g(...)486 2036 y(end)396 2227 y Fv(If)20 b(a)h(class)g(does)f(not)g
+(need)f(a)i(method)e(\(e.g.)g(because)h(it)h(does)e(not)h(mak)o(e)g
+(sense,)g(or)g(it)h(w)o(ould)f(violate)f(some)396 2335
+y(important)g(condition\),)f(it)j(is)g(possible)f(to)g(de\002ne)g(the)g
+(method)f(and)g(to)i(al)o(w)o(ays)f(raise)h(an)f(e)o(xception)e(when)i
+(the)396 2443 y(method)f(is)i(in)m(v)n(ok)o(ed)e(\(e.g.)g
+Fq(assert)44 b(false)p Fv(\).)396 2592 y(The)20 b(latter)g(is)i(a)e
+(strong)g(recommendation:)c(do)k(not)g(try)g(to)g(further)f(specialize)
+h(the)g(types)g(of)g(e)o(xtension)f(objects.)h(It)g(is)396
+2700 y(dif)n(\002cult,)g(sometimes)g(e)n(v)o(en)f(impossible,)g(and)h
+(almost)g(ne)n(v)o(er)f(w)o(orth-while.)-2 3070 y Fp(3.3.2.)35
+b(Ho)n(w)f(to)f(bind)h(e)n(xtension)h(c)n(lasses)h(to)d(element)i
+(types)396 3237 y Fv(Once)20 b(you)f(ha)n(v)o(e)h(de\002ned)f(your)g(e)
+o(xtension)g(classes,)i(you)e(can)h(bind)g(them)f(to)i(element)e
+(types.)h(The)g(simplest)h(case)f(is)396 3345 y(that)h(you)e(ha)n(v)o
+(e)g(only)h(one)f(class)j(and)d(that)i(this)f(class)h(is)h(to)e(be)g
+(al)o(w)o(ays)h(used.)e(The)h(parsing)f(functions)g(in)h(the)h(module)
+396 3453 y Fq(Pxp_yacc)f Fv(tak)o(e)g(a)h Fq(spec)f Fv(ar)o(gument)d
+(which)j(can)g(be)g(customized.)f(If)h(your)f(single)h(class)h(has)g
+(the)f(name)f Fq(c)p Fv(,)i(this)396 3561 y(ar)o(gument)d(should)h(be)
+396 3741 y Fq(let)45 b(spec)f(=)486 3839 y(make_spec_from_alist)576
+3936 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
+4033 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
+4130 y(~element_alist:)535 b([])576 4227 y(\(\))396 4418
+y Fv(This)21 b(means)f(that)g(data)g(nodes)f(will)i(be)f(created)g
+(from)f(the)h(e)o(x)o(emplar)e(passed)i(by)g(~data_e)o(x)o(emplar)d
+(and)j(that)g(all)396 4526 y(element)g(nodes)f(will)i(be)f(made)g(from)
+f(the)h(e)o(x)o(emplar)e(speci\002ed)i(by)g(~def)o(ault_element_e)o(x)o
+(emplar)-5 b(.)15 b(In)396 4634 y(~element_alist,)k(you)h(can)g(pass)g
+(that)h(dif)n(ferent)d(e)o(x)o(emplars)h(are)h(to)g(be)g(used)g(for)g
+(dif)n(ferent)e(element)i(types;)g(b)n(ut)g(this)396
+4742 y(is)h(an)g(optional)d(feature.)h(If)h(you)g(do)g(not)f(need)h
+(it,)h(pass)f(the)g(empty)g(list.)p Black 3800 5278 a
+Fr(68)p Black eop
+%%Page: 69 69
+69 68 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(Remember)f(that)i(an)f
+(e)o(x)o(emplar)e(is)j(a)g(\(node,)d(e)o(xtension\))h(pair)g(that)i
+(serv)o(es)f(as)h(pattern)e(when)h(ne)n(w)g(nodes)f(\(and)g(the)396
+687 y(corresponding)e(e)o(xtension)i(objects\))g(are)h(added)f(to)i
+(the)f(document)e(tree.)i(In)g(this)h(case,)f(the)g(e)o(x)o(emplar)f
+(contains)g Fq(c)i Fv(as)396 795 y(e)o(xtension,)e(and)g(when)h(nodes)f
+(are)i(created,)e(the)h(e)o(x)o(emplar)e(is)j(cloned,)e(and)h(cloning)f
+(mak)o(es)h(also)g(a)h(cop)o(y)e(of)h Fq(c)h Fv(such)396
+903 y(that)g(all)f(nodes)g(of)g(the)g(document)e(tree)i(will)h(ha)n(v)o
+(e)f(a)g(cop)o(y)g(of)g Fq(c)g Fv(as)h(e)o(xtension.)396
+1052 y(The)f Fq(~element_alist)f Fv(ar)o(gument)e(can)j(bind)g
+(speci\002c)g(element)g(types)g(to)g(speci\002c)g(e)o(x)o(emplars;)f
+(as)i(e)o(x)o(emplars)396 1160 y(may)f(be)g(instances)g(of)g(dif)n
+(ferent)f(classes)i(it)g(is)g(ef)n(fecti)n(v)o(ely)d(possible)i(to)h
+(bind)e(element)h(types)g(to)g(classes.)h(F)o(or)396
+1268 y(e)o(xample,)e(if)h(the)g(element)g(type)g("p")g(is)h
+(implemented)d(by)i(class)h("c_p",)e(and)h("q")g(is)h(realized)f(by)f
+("c_q",)h(you)f(can)396 1376 y(pass)i(the)f(follo)n(wing)f(v)n(alue:)
+396 1556 y Fq(let)45 b(spec)f(=)486 1653 y(make_spec_from_alist)576
+1750 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576
+1847 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576
+1945 y(~element_alist:)665 2042 y([)i("p",)f(new)g(element_impl)f(c_p;)
+755 2139 y("q",)h(new)g(element_impl)f(c_q;)665 2236
+y(])576 2333 y(\(\))396 2524 y Fv(The)20 b(e)o(xtension)f(object)h
+Fq(c)g Fv(is)h(still)h(used)e(for)f(all)i(data)f(nodes)f(and)h(for)g
+(all)g(other)g(element)f(types.)-2 3026 y Fx(3.4.)39
+b(Details)f(of)i(the)f(mapping)e(fr)m(om)i(XML)g(te)n(xt)g(to)g(the)g
+(tree)-2 3212 y(representation)-2 3540 y Fp(3.4.1.)c(The)f
+(representation)h(of)e(c)o(haracter)n(-free)h(elements)396
+3708 y Fv(If)20 b(an)g(element)g(declaration)f(does)h(not)f(allo)n(w)i
+(the)f(element)f(to)i(contain)e(character)g(data,)h(the)g(follo)n(wing)
+e(rules)j(apply)-5 b(.)396 3858 y(If)20 b(the)h(element)e(must)h(be)g
+(empty)-5 b(,)19 b(i.e.)h(it)h(is)g(declared)e(with)i(the)f(k)o(e)o(yw)
+o(ord)e Fq(EMPTY)p Fv(,)i(the)g(element)g(instance)g(must)g(be)396
+3965 y(ef)n(fecti)n(v)o(ely)f(empty)g(\(it)h(must)h(not)f(e)n(v)o(en)f
+(contain)g(whitespace)h(characters\).)e(The)i(parser)g(guarantees)e
+(that)j(a)f(declared)396 4073 y Fq(EMPTY)g Fv(element)g(does)g(ne)n(v)o
+(er)f(contain)g(a)h(data)g(node,)f(e)n(v)o(en)g(if)i(the)f(data)g(node)
+f(represents)h(the)g(empty)f(string.)396 4223 y(If)h(the)h(element)e
+(declaration)g(only)g(permits)h(other)f(elements)h(to)h(occur)e(within)
+h(that)g(element)g(b)n(ut)g(not)g(character)396 4331
+y(data,)g(it)h(is)g(still)g(possible)f(to)h(insert)f(whitespace)g
+(characters)f(between)g(the)h(subelements.)f(The)h(parser)g(ignores)f
+(these)396 4439 y(characters,)g(too,)h(and)g(does)f(not)h(create)g
+(data)g(nodes)g(for)f(them.)396 4588 y Fu(Example.)h
+Fv(Consider)g(the)g(follo)n(wing)f(element)g(types:)396
+4768 y Fq(<!ELEMENT)44 b(x)g(\()h(#PCDATA)f(|)g(z)h(\)*)f(>)396
+4865 y(<!ELEMENT)g(y)g(\()h(z)g(\)*)f(>)p Black 3800
+5278 a Fr(69)p Black eop
+%%Page: 70 70
+70 69 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fq(<!ELEMENT)44
+b(z)g(EMPTY>)396 770 y Fv(Only)20 b Fq(x)h Fv(may)e(contain)h
+(character)e(data,)i(the)h(k)o(e)o(yw)o(ord)d Fq(#PCDATA)h
+Fv(indicates)h(this.)h(The)f(other)f(types)h(are)396
+878 y(character)n(-free.)396 1027 y(The)g(XML)g(term)396
+1207 y Fq(<x><z/>)44 b(<z/></x>)396 1398 y Fv(will)21
+b(be)f(internally)f(represented)g(by)g(an)i(element)e(node)g(for)h
+Fq(x)g Fv(with)h(three)f(subnodes:)e(the)j(\002rst)g
+Fq(z)f Fv(element,)g(a)g(data)396 1506 y(node)f(containing)g(the)h
+(space)g(character)m(,)e(and)i(the)g(second)g Fq(z)g
+Fv(element.)g(In)f(contrast)h(to)g(this,)h(the)f(term)396
+1686 y Fq(<y><z/>)44 b(<z/></y>)396 1877 y Fv(is)21 b(represented)e(by)
+h(an)g(element)f(node)g(for)h Fq(y)h Fv(with)f(only)f
+Fr(two)i Fv(subnodes,)e(the)h(tw)o(o)g Fq(z)h Fv(elements.)e(There)h
+(is)h(no)f(data)396 1985 y(node)f(for)h(the)g(space)g(character)f
+(because)h(spaces)g(are)g(ignored)f(in)h(the)g(character)n(-free)e
+(element)i Fq(y)p Fv(.)-2 2355 y Fp(3.4.2.)35 b(The)f(representation)h
+(of)e(c)o(haracter)h(data)396 2523 y Fv(The)20 b(XML)g(speci\002cation)
+g(allo)n(ws)g(all)h(Unicode)e(characters)g(in)i(XML)f(te)o(xts.)g(This)
+g(parser)g(can)g(be)g(con\002gured)e(such)396 2631 y(that)j(UTF-8)e(is)
+i(used)f(to)h(represent)e(the)h(characters)f(internally;)g(ho)n(we)n(v)
+o(er)m(,)f(the)i(def)o(ault)g(character)e(encoding)h(is)396
+2738 y(ISO-8859-1.)e(\(Currently)-5 b(,)18 b(no)i(other)f(encodings)g
+(are)h(possible)g(for)f(the)i(internal)e(string)h(representation;)e
+(the)i(type)396 2846 y Fq(Pxp_types.rep_encoding)d Fv(enumerates)i(the)
+h(possible)g(encodings.)e(Principially)-5 b(,)19 b(the)h(parser)g
+(could)f(use)h(an)o(y)396 2954 y(encoding)e(that)j(is)g
+(ASCII-compatible,)d(b)n(ut)i(there)g(are)g(currently)e(only)i(le)o
+(xical)f(analyzers)h(for)f(UTF-8)h(and)396 3062 y(ISO-8859-1.)d(It)k
+(is)g(currently)d(impossible)i(to)g(use)h(UTF-16)e(or)h(UCS-4)g(as)h
+(internal)f(encodings)e(\(or)i(other)f(multibyte)396
+3170 y(encodings)g(which)g(are)h(not)g(ASCII-compatible\))e(unless)i
+(major)g(parts)g(of)g(the)g(parser)g(are)g(re)n(written)f(-)i(unlik)o
+(ely)-5 b(...\))396 3320 y(The)20 b(internal)g(encoding)e(may)h(be)h
+(dif)n(ferent)f(from)g(the)h(e)o(xternal)f(encoding)f(\(speci\002ed)i
+(in)g(the)g(XML)h(declaration)396 3428 y Fo(<)p Fq(?xml)44
+b(...)g(encoding="..."?)p Fo(>)p Fv(\);)18 b(in)j(this)f(case)h(the)f
+(strings)g(are)g(automatically)f(con)m(v)o(erted)f(to)i(the)g(internal)
+396 3535 y(encoding.)396 3685 y(If)g(the)h(internal)e(encoding)f(is)j
+(ISO-8859-1,)c(it)k(is)g(possible)f(that)g(there)g(are)g(characters)g
+(that)g(cannot)f(be)h(represented.)396 3793 y(In)g(this)h(case,)f(the)g
+(parser)g(ignores)f(such)h(characters)f(and)h(prints)g(a)h(w)o(arning)e
+(\(to)h(the)g Fq(collect_warning)e Fv(object)396 3901
+y(that)j(must)f(be)g(passed)g(when)g(the)g(parser)f(is)i(called\).)396
+4050 y(The)f(XML)g(speci\002cation)g(allo)n(ws)g(lines)h(to)f(be)g
+(separated)g(by)f(single)h(LF)h(characters,)e(by)h(CR)h(LF)g(character)
+396 4158 y(sequences,)e(or)h(by)g(single)g(CR)i(characters.)d
+(Internally)-5 b(,)18 b(these)i(separators)f(are)h(al)o(w)o(ays)h(con)m
+(v)o(erted)d(to)i(single)g(LF)396 4266 y(characters.)396
+4416 y(The)g(parser)g(guarantees)e(that)j(there)e(are)i(ne)n(v)o(er)d
+(tw)o(o)j(adjacent)e(data)h(nodes;)g(if)g(necessary)-5
+b(,)19 b(data)h(material)g(that)g(w)o(ould)396 4523 y(otherwise)g(be)g
+(represented)e(by)i(se)n(v)o(eral)g(nodes)f(is)i(collapsed)f(into)f
+(one)h(node.)f(Note)h(that)g(you)g(can)g(still)h(create)f(node)396
+4631 y(trees)h(with)f(adjacent)g(data)g(nodes;)f(ho)n(we)n(v)o(er)m(,)f
+(the)i(parser)g(does)f(not)h(return)f(such)h(trees.)p
+Black 3800 5278 a Fr(70)p Black eop
+%%Page: 71 71
+71 70 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black 396 579 a Fv(Note)g(that)h(CD)m(A)
+-9 b(T)h(A)20 b(sections)g(are)g(not)g(represented)f(specially;)h(such)
+g(sections)g(are)g(added)f(to)h(the)h(current)d(data)396
+687 y(material)i(that)g(being)g(collected)f(for)h(the)g(ne)o(xt)f(data)
+h(node.)-2 1056 y Fp(3.4.3.)35 b(The)f(representation)h(of)e(entities)h
+(within)g(documents)396 1224 y Fr(Entities)21 b(ar)m(e)f(not)g(r)m(epr)
+m(esented)f(within)i(documents!)d Fv(If)i(the)h(parser)e(\002nds)h(an)h
+(entity)e(reference)g(in)h(the)g(document)396 1332 y(content,)f(the)h
+(reference)f(is)i(immediately)e(e)o(xpanded,)e(and)j(the)g(parser)g
+(reads)g(the)g(e)o(xpansion)e(te)o(xt)i(instead)g(of)g(the)396
+1440 y(reference.)-2 1810 y Fp(3.4.4.)35 b(The)f(representation)h(of)e
+(attrib)n(utes)396 1977 y Fv(As)21 b(attrib)n(ute)f(v)n(alues)g(are)g
+(composed)e(of)i(Unicode)f(characters,)g(too,)h(the)g(same)h(problems)d
+(with)j(the)f(character)396 2085 y(encoding)e(arise)j(as)g(for)e
+(character)g(material.)h(Attrib)n(ute)g(v)n(alues)g(are)g(con)m(v)o
+(erted)d(to)k(the)f(internal)f(encoding,)f(too;)i(and)396
+2193 y(if)h(there)e(are)i(characters)e(that)h(cannot)f(be)h
+(represented,)e(these)j(are)f(dropped,)e(and)h(a)i(w)o(arning)e(is)i
+(printed.)396 2343 y(Attrib)n(ute)f(v)n(alues)g(are)g(normalized)e
+(before)h(the)o(y)h(are)g(returned)e(by)i(methods)f(lik)o(e)h
+Fq(attribute)p Fv(.)f(First,)i(an)o(y)396 2451 y(remaining)e(entity)h
+(references)e(are)i(e)o(xpanded;)e(if)j(necessary)-5
+b(,)19 b(e)o(xpansion)f(is)j(performed)c(recursi)n(v)o(ely)-5
+b(.)18 b(Second,)396 2558 y(ne)n(wline)i(characters)f(\(an)o(y)g(of)h
+(LF)-7 b(,)21 b(CR)g(LF)-7 b(,)21 b(or)f(CR)h(characters\))e(are)h(con)
+m(v)o(erted)e(to)i(single)g(space)h(characters.)e(Note)396
+2666 y(that)i(especially)e(the)i(latter)f(action)g(is)h(prescribed)d
+(by)i(the)g(XML)g(standard)f(\(b)n(ut)41 b(is)21 b(not)f(con)m(v)o
+(erted)e(such)i(that)g(it)h(is)396 2774 y(still)h(possible)e(to)g
+(include)f(line)h(feeds)g(into)g(attrib)n(utes\).)-2
+3144 y Fp(3.4.5.)35 b(The)f(representation)h(of)e(pr)n(ocessing)h
+(instructions)396 3312 y Fv(Processing)20 b(instructions)f(are)h
+(parsed)g(to)g(some)g(e)o(xtent:)f(The)h(\002rst)h(w)o(ord)f(of)g(the)g
+(PI)g(is)i(called)e(the)g(tar)o(get,)f(and)g(it)i(is)396
+3420 y(stored)f(separated)f(from)g(the)i(rest)f(of)g(the)g(PI:)396
+3600 y Fq(<?target)44 b(rest?>)396 3791 y Fv(The)20 b(e)o(xact)g
+(location)f(where)h(a)g(PI)h(occurs)e(is)i(not)f(represented)f(\(by)g
+(def)o(ault\).)g(The)h(parser)f(puts)i(the)f(PI)g(into)g(the)396
+3899 y(object)g(that)g(represents)g(the)g(embracing)e(construct)h(\(an)
+h(element,)f(a)i(DTD,)f(or)g(the)g(whole)g(document\);)e(that)i(means)
+396 4007 y(you)g(can)g(\002nd)f(out)h(which)g(PIs)h(occur)e(in)h(a)h
+(certain)f(element,)f(in)h(the)h(DTD,)f(or)g(in)g(the)g(whole)g
+(document,)e(b)n(ut)i(you)396 4114 y(cannot)f(lookup)g(the)h(e)o(xact)g
+(position)f(within)h(the)g(construct.)396 4264 y(If)g(you)g(require)e
+(the)j(e)o(xact)e(location)h(of)g(PIs,)g(it)h(is)g(possible)f(to)g
+(create)g(e)o(xtra)g(nodes)f(for)h(them.)f(This)i(mode)e(is)396
+4372 y(controled)g(by)g(the)i(option)e Fq(enable_pinstr_nodes)p
+Fv(.)e(The)j(additional)f(nodes)g(ha)n(v)o(e)h(the)g(node)f(type)h
+Fq(T_pinstr)396 4480 y Fn(target)p Fv(,)g(and)f(are)i(created)e(from)g
+(special)h(e)o(x)o(emplars)f(contained)f(in)j(the)f Fq(spec)g
+Fv(\(see)g(pxp_document.mli\).)p Black 3800 5278 a Fr(71)p
+Black eop
+%%Page: 72 72
+72 71 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr)
+m(esenting)g(the)g(document)p Black -2 583 a Fp(3.4.6.)35
+b(The)f(representation)h(of)e(comments)396 751 y Fv(Normally)-5
+b(,)19 b(comments)g(are)h(not)g(represented;)e(the)o(y)i(are)g(dropped)
+e(by)h(def)o(ault.)h(Ho)n(we)n(v)o(er)m(,)e(if)i(you)f(require)g(them,)
+h(it)h(is)396 859 y(possible)f(to)h(create)e Fq(T_comment)h
+Fv(nodes)f(for)h(them.)f(This)i(mode)e(can)h(be)g(speci\002ed)g(by)g
+(the)g(option)396 967 y Fq(enable_comment_nodes)p Fv(.)d(Comment)j
+(nodes)f(are)h(created)g(from)f(special)h(e)o(x)o(emplars)f(contained)f
+(in)j(the)f Fq(spec)396 1075 y Fv(\(see)h(pxp_document.mli\).)15
+b(Y)-9 b(ou)19 b(can)h(access)h(the)f(contents)g(of)g(comments)f
+(through)f(the)i(method)f Fq(comment)p Fv(.)-2 1444 y
+Fp(3.4.7.)35 b(The)f(attrib)n(utes)f Fc(xml:lang)d Fp(and)k
+Fc(xml:space)396 1612 y Fv(These)20 b(attrib)n(utes)g(are)g(not)g
+(supported)f(specially;)h(the)o(y)f(are)h(handled)f(lik)o(e)h(an)o(y)g
+(other)f(attrib)n(ute.)-2 1982 y Fp(3.4.8.)35 b(And)f(what)f(about)h
+(namespaces?)396 2149 y Fv(Currently)-5 b(,)19 b(there)g(is)i(no)f
+(special)h(support)d(for)i(namespaces.)f(Ho)n(we)n(v)o(er)m(,)f(the)i
+(parser)g(allo)n(ws)g(it)h(that)f(the)h(colon)e(occurs)396
+2257 y(in)i(names)e(such)h(that)h(it)g(is)g(possible)f(to)g(implement)f
+(namespaces)g(on)h(top)g(of)g(the)g(current)f(API.)396
+2407 y(Some)h(future)f(release)h(of)g(PXP)h(will)g(support)e
+(namespaces)g(as)i(b)n(uilt-in)f(feature...)p Black 3800
+5278 a Fr(72)p Black eop
+%%Page: 73 73
+73 72 bop Black Black -2 621 a Fs(Chapter)48 b(4.)f(Con\002guring)j
+(and)e(calling)f(the)h(par)m(ser)-2 1055 y Fx(4.1.)39
+b(Over)q(vie)n(w)396 1235 y Fv(There)20 b(are)g(the)g(follo)n(wing)f
+(main)g(functions)g(in)m(v)n(oking)f(the)i(parser)g(\(in)g(Pxp_yacc\):)
+p Black 396 1558 a Ft(\225)p Black 60 w Fr(par)o(se_document_entity:)d
+Fv(Y)-9 b(ou)19 b(w)o(ant)i(to)f(parse)g(a)g(complete)g(and)f(closed)h
+(document)e(consisting)i(of)g(a)g(DTD)h(and)479 1666
+y(the)f(document)f(body;)g(the)h(body)f(is)i(v)n(alidated)e(against)g
+(the)h(DTD.)h(This)f(mode)f(is)i(interesting)f(if)g(you)f(ha)n(v)o(e)h
+(a)h(\002le)479 1835 y Fq(<!DOCTYPE)44 b(root)g(...)g([)h(...)f(])h(>)f
+(<root>)g(...)h(</root>)396 1984 y Fv(and)20 b(you)f(can)h(accept)g(an)
+o(y)f(DTD)i(that)f(is)h(included)e(in)h(the)g(\002le)h(\(e.g.)f
+(because)f(the)h(\002le)h(is)g(under)e(your)g(control\).)p
+Black 396 2092 a Ft(\225)p Black 60 w Fr(par)o(se_wfdocument_entity:)e
+Fv(Y)-9 b(ou)20 b(w)o(ant)g(to)g(parse)g(a)h(complete)e(and)h(closed)f
+(document)g(consisting)g(of)h(a)h(DTD)479 2200 y(and)f(the)g(document)e
+(body;)h(b)n(ut)h(the)h(body)d(is)k(not)d(v)n(alidated,)g(only)h(check)
+o(ed)e(for)i(well-formedness.)e(This)i(mode)f(is)479
+2308 y(preferred)f(if)j(v)n(alidation)d(costs)j(too)f(much)f(time)i(or)
+f(if)g(the)g(DTD)h(is)g(missing.)p Black 396 2416 a Ft(\225)p
+Black 60 w Fr(par)o(se_dtd_entity:)d Fv(Y)-9 b(ou)20
+b(w)o(ant)g(only)f(to)i(parse)e(an)i(entity)e(\(\002le\))i(containing)d
+(the)i(e)o(xternal)f(subset)h(of)g(a)h(DTD.)479 2524
+y(Sometimes)f(it)h(is)g(interesting)e(to)i(read)e(such)h(a)h(DTD,)f
+(for)g(e)o(xample)e(to)j(compare)d(it)j(with)g(the)f(DTD)g(included)f
+(in)h(a)479 2632 y(document,)e(or)i(to)g(apply)g(the)g(ne)o(xt)f(mode:)
+p Black 396 2740 a Ft(\225)p Black 60 w Fr(par)o(se_content_entity:)e
+Fv(Y)-9 b(ou)20 b(w)o(ant)g(only)g(to)g(parse)g(an)g(entity)g
+(\(\002le\))g(containing)e(a)j(fragment)d(of)i(a)h(document)479
+2848 y(body;)e(this)i(fragment)d(is)j(v)n(alidated)f(against)f(the)h
+(DTD)h(you)e(pass)i(to)f(the)g(function.)e(Especially)-5
+b(,)19 b(the)i(fragment)479 2956 y(must)g(not)e(ha)n(v)o(e)h(a)65
+b Fo(<)p Fq(!DOCTYPE)p Fo(>)19 b Fv(clause,)h(and)g(must)g(directly)g
+(be)o(gin)f(with)h(an)g(element.)f(The)h(element)g(is)479
+3064 y(v)n(alidated)f(against)h(the)g(DTD.)g(This)h(mode)e(is)i
+(interesting)e(if)i(you)e(w)o(ant)h(to)h(check)e(documents)f(against)i
+(a)h(\002x)o(ed,)479 3172 y(immutable)e(DTD.)p Black
+396 3280 a Ft(\225)p Black 60 w Fr(par)o(se_wfcontent_entity:)f
+Fv(This)i(function)f(also)h(parses)g(a)h(single)f(element)g(without)f
+(DTD,)h(b)n(ut)g(does)g(not)g(v)n(alidate)479 3388 y(it.)p
+Black 396 3495 a Ft(\225)p Black 60 w Fr(e)n(xtr)o(act_dtd_fr)l
+(om_document_entity:)15 b Fv(This)20 b(function)f(e)o(xtracts)g(the)i
+(DTD)f(from)f(a)i(closed)f(document)479 3603 y(consisting)g(of)g(a)g
+(DTD)h(and)e(a)i(document)d(body)-5 b(.)18 b(Both)j(the)f(internal)f
+(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted.)396
+3794 y(In)f(man)o(y)f(cases,)i Fq(parse_document_entity)c
+Fv(is)k(the)f(preferred)e(mode)i(to)g(parse)g(a)g(document)f(in)h(a)h
+(v)n(alidating)396 3902 y(w)o(ay)-5 b(,)20 b(and)g Fq
+(parse_wfdocument_entity)c Fv(is)22 b(the)e(mode)f(of)h(choice)f(to)i
+(parse)f(a)g(\002le)h(while)f(only)g(checking)e(for)396
+4010 y(well-formedness.)396 4160 y(There)i(are)g(a)g(number)f(of)h(v)n
+(ariations)f(of)h(these)g(modes.)f(One)h(important)f(application)g(of)h
+(a)g(parser)g(is)h(to)f(check)396 4268 y(documents)f(of)h(an)g
+(untrusted)f(source)g(against)h(a)g(\002x)o(ed)g(DTD.)g(One)g(solution)
+f(is)i(to)g(not)f(allo)n(w)g(the)g Fo(<)p Fq(!DOCTYPE)p
+Fo(>)396 4375 y Fv(clause)g(in)h(these)f(documents,)e(and)i(treat)g
+(the)h(document)d(lik)o(e)i(a)h(fragment)d(\(using)i(mode)f
+Fr(par)o(se_content_entity)p Fv(\).)396 4483 y(This)i(is)g(v)o(ery)e
+(simple,)h(b)n(ut)g(in\003e)o(xible;)f(users)i(of)e(such)h(a)h(system)f
+(cannot)f(e)n(v)o(en)h(de\002ne)f(additional)g(entities)i(to)396
+4591 y(abbre)n(viate)e(frequent)f(phrases)i(of)g(their)g(te)o(xt.)396
+4741 y(It)h(may)e(be)i(necessary)e(to)h(ha)n(v)o(e)g(a)h(more)e
+(intelligent)g(check)o(er)-5 b(.)20 b(F)o(or)g(e)o(xample,)e(it)j(is)g
+(also)g(possible)e(to)i(parse)f(the)396 4849 y(document)e(to)j(check)e
+(fully)-5 b(,)19 b(i.e.)h(with)h(DTD,)f(and)f(to)i(compare)d(this)j
+(DTD)f(with)h(the)f(prescribed)f(one.)g(In)h(order)f(to)p
+Black 3800 5278 a Fr(73)p Black eop
+%%Page: 74 74
+74 73 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(fully)g(parse)g(the)g
+(document,)e(mode)h Fr(par)o(se_document_entity)e Fv(is)k(applied,)e
+(and)h(to)g(get)g(the)g(DTD)h(to)f(compare)f(with)396
+687 y(mode)g Fr(par)o(se_dtd_entity)f Fv(can)i(be)h(used.)396
+836 y(There)f(is)h(another)d(v)o(ery)i(important)e(con\002gurable)g
+(aspect)i(of)g(the)g(parser:)g(the)g(so-called)g(resolv)o(er)-5
+b(.)19 b(The)h(task)g(of)g(the)396 944 y(resolv)o(er)f(is)i(to)g
+(locate)f(the)g(contents)f(of)h(an)g(\(e)o(xternal\))f(entity)g(for)h
+(a)h(gi)n(v)o(en)e(entity)g(name,)h(and)f(to)i(mak)o(e)e(the)i
+(contents)396 1052 y(accessible)g(as)f(a)h(character)e(stream.)h
+(\(Furthermore,)d(it)k(also)f(normalizes)g(the)g(character)f(set;)i(b)n
+(ut)f(this)h(is)g(a)f(detail)h(we)396 1160 y(can)f(ignore)f(here.\))g
+(Consider)h(you)f(ha)n(v)o(e)h(a)g(\002le)h(called)f
+Fq("main.xml")f Fv(containing)396 1340 y Fq(<!ENTITY)44
+b(\045)g(sub)h(SYSTEM)f("sub/sub.xml">)396 1437 y(\045sub;)396
+1628 y Fv(and)20 b(a)h(\002le)f(stored)g(in)g(the)h(subdirectory)c
+Fq("sub")j Fv(with)h(name)e Fq("sub.xml")g Fv(containing)396
+1808 y Fq(<!ENTITY)44 b(\045)g(subsub)g(SYSTEM)g("subsub/subsub.xml">)
+396 1906 y(\045subsub;)396 2097 y Fv(and)20 b(a)g(\002le)h(stored)e(in)
+h(the)g(subdirectory)d Fq("subsub")j Fv(of)f Fq("sub")h
+Fv(with)g(name)f Fq("subsub.xml")g Fv(\(the)g(contents)h(of)f(this)396
+2204 y(\002le)i(do)f(not)g(matter\).)f(Here,)h(the)g(resolv)o(er)f
+(must)h(track)g(that)g(the)g(second)g(entity)g Fq(subsub)f
+Fv(is)i(located)f(in)g(the)h(directory)396 2312 y Fq("sub/subsub")p
+Fv(,)e(i.e.)h(the)g(dif)n(\002culty)f(is)i(to)g(interpret)e(the)h
+(system)g(\(\002le\))h(names)e(of)h(entities)h(relati)n(v)o(e)e(to)i
+(the)f(entities)396 2420 y(containing)f(them,)g(e)n(v)o(en)g(if)i(the)f
+(entities)h(are)f(deeply)f(nested.)396 2570 y(There)h(is)h(not)f(a)g
+(\002x)o(ed)g(resolv)o(er)f(already)g(doing)g(e)n(v)o(erything)e(right)
+j(-)g(resolving)f(entity)h(names)g(is)h(a)f(task)h(that)f(highly)396
+2678 y(depends)f(on)h(the)g(en)m(vironment.)d(The)j(XML)g
+(speci\002cation)f(only)h(demands)f(that)h Fq(SYSTEM)g
+Fv(entities)g(are)g(interpreted)396 2786 y(lik)o(e)h(URLs)g(\(which)e
+(is)i(not)f(v)o(ery)f(precise,)h(as)h(there)e(are)i(lots)f(of)g(URL)h
+(schemes)f(in)g(use\),)g(hoping)f(that)h(this)h(helps)396
+2894 y(o)o(v)o(ercoming)c(the)j(local)g(peculiarities)g(of)g(the)g(en)m
+(vironment;)d(the)k(idea)f(is)h(that)f(if)h(you)e(do)h(not)f(kno)n(w)h
+(your)396 3001 y(en)m(vironment)d(you)j(can)g(refer)f(to)h(other)g
+(entities)g(by)g(denoting)e(URLs)k(for)d(them.)h(I)g(think)g(that)g
+(this)h(interpretation)d(of)396 3109 y Fq(SYSTEM)i Fv(names)g(may)g(ha)
+n(v)o(e)f(some)h(applications)f(in)i(the)f(internet,)f(b)n(ut)h(it)h
+(is)g(not)f(the)g(\002rst)h(choice)f(in)g(general.)396
+3217 y(Because)h(of)f(this,)g(the)g(resolv)o(er)f(is)i(a)g(separate)f
+(module)e(of)i(the)h(parser)e(that)h(can)g(be)h(e)o(xchanged)c(by)j
+(another)f(one)g(if)396 3325 y(necessary;)h(more)f(precisely)-5
+b(,)19 b(the)h(parser)g(already)f(de\002nes)h(se)n(v)o(eral)f(resolv)o
+(ers.)396 3475 y(The)h(follo)n(wing)f(resolv)o(ers)g(do)h(already)f(e)o
+(xist:)p Black 396 3707 a Ft(\225)p Black 60 w Fv(Resolv)o(ers)h
+(reading)f(from)g(arbitrary)g(input)g(channels.)g(These)h(can)g(be)g
+(con\002gured)e(such)i(that)g(a)h(certain)f(ID)g(is)479
+3815 y(associated)g(with)h(the)f(channel;)f(in)h(this)h(case)g(inner)e
+(references)g(to)h(e)o(xternal)f(entities)i(can)f(be)g(resolv)o(ed.)e
+(There)i(is)479 3923 y(also)h(a)f(special)h(resolv)o(er)e(that)h
+(interprets)f(SYSTEM)i(IDs)f(as)h(URLs;)g(this)g(resolv)o(er)e(can)h
+(process)g(relati)n(v)o(e)479 4031 y(SYSTEM)h(names)e(and)h(determine)f
+(the)h(corresponding)d(absolute)i(URL.)p Black 396 4139
+a Ft(\225)p Black 60 w Fv(A)i(resolv)o(er)e(that)h(reads)g(al)o(w)o
+(ays)h(from)e(a)i(gi)n(v)o(en)d(O'Caml)j(string.)e(This)i(resolv)o(er)e
+(is)i(not)f(able)g(to)g(resolv)o(e)f(further)479 4247
+y(names)h(unless)g(the)h(string)f(is)h(not)f(associated)g(with)g(an)o
+(y)f(name,)h(i.e.)g(if)g(the)g(document)f(contained)f(in)j(the)f
+(string)479 4355 y(refers)g(to)g(an)g(e)o(xternal)f(entity)-5
+b(,)20 b(this)g(reference)f(cannot)g(be)h(follo)n(wed)f(in)h(this)h
+(case.)p Black 396 4463 a Ft(\225)p Black 60 w Fv(A)g(resolv)o(er)e
+(for)g(\002le)i(names.)f(The)g Fq(SYSTEM)g Fv(name)f(is)i(interpreted)e
+(as)i(\002le)f(URL)h(with)g(the)f(slash)h("/")f(as)h(separator)479
+4571 y(for)f(directories.)f(-)h(This)h(resolv)o(er)d(is)k(deri)n(v)o
+(ed)c(from)h(the)h(generic)f(URL)i(resolv)o(er)-5 b(.)396
+4720 y(The)20 b(interf)o(ace)f(a)i(resolv)o(er)e(must)h(ha)n(v)o(e)g
+(is)h(documented,)c(so)k(it)g(is)g(possible)f(to)g(write)g(your)f(o)n
+(wn)h(resolv)o(er)-5 b(.)19 b(F)o(or)396 4828 y(e)o(xample,)g(you)g
+(could)g(connect)g(the)h(parser)g(with)g(an)h(HTTP)f(client,)g(and)f
+(resolv)o(e)h(URLs)h(of)f(the)g(HTTP)g(namespace.)p Black
+3800 5278 a Fr(74)p Black eop
+%%Page: 75 75
+75 74 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(resolv)o(er)f
+(classes)i(support)e(that)h(se)n(v)o(eral)g(independent)e(resolv)o(ers)
+h(are)h(combined)e(to)i(one)g(more)f(po)n(werful)396
+687 y(resolv)o(er;)g(thus)h(it)h(is)g(possible)f(to)h(combine)d(a)j
+(self-written)e(resolv)o(er)g(with)i(the)f(already)f(e)o(xisting)g
+(resolv)o(ers.)396 836 y(Note)h(that)h(the)f(e)o(xisting)f(resolv)o
+(ers)h(only)f(interpret)g Fq(SYSTEM)h Fv(names,)f(not)h
+Fq(PUBLIC)g Fv(names.)g(If)g(it)h(helps)f(you,)f(it)h(is)396
+944 y(possible)g(to)f(de\002ne)h(resolv)o(ers)e(for)h
+Fq(PUBLIC)h Fv(names,)f(too;)g(for)g(e)o(xample,)f(such)i(a)g(resolv)o
+(er)e(could)h(look)g(up)g(the)h(public)396 1052 y(name)g(in)g(a)h(hash)
+f(table,)g(and)f(map)h(it)h(to)f(a)h(system)f(name)g(which)g(is)h
+(passed)f(o)o(v)o(er)f(to)h(the)g(e)o(xisting)g(resolv)o(er)e(for)396
+1160 y(system)j(names.)e(It)i(is)g(relati)n(v)o(ely)e(simple)h(to)g
+(pro)o(vide)f(such)g(a)i(resolv)o(er)-5 b(.)-2 1579 y
+Fx(4.2.)39 b(Resolver)n(s)e(and)i(sour)m(ces)-2 1907
+y Fp(4.2.1.)c(Using)f(the)g(b)n(uilt-in)f(resolver)n(s)i(\(called)g
+(sour)n(ces\))396 2075 y Fv(The)20 b(type)g Fq(source)g
+Fv(enumerates)e(the)j(tw)o(o)f(possibilities)h(where)e(the)h(document)f
+(to)h(parse)g(comes)g(from.)396 2255 y Fq(type)44 b(source)g(=)576
+2352 y(Entity)f(of)i(\(\(dtd)f(-)p Fo(>)g Fq(Pxp_entity.entity\))e(*)j
+(Pxp_reader.resolver\))486 2449 y(|)g(ExtID)f(of)g(\(ext_id)g(*)g
+(Pxp_reader.resolver\))396 2640 y Fv(Y)-9 b(ou)20 b(normally)e(need)i
+(not)g(to)g(w)o(orry)f(about)h(this)g(type)g(as)h(there)f(are)g(con)m
+(v)o(enience)d(functions)i(that)h(create)g Fq(source)396
+2748 y Fv(v)n(alues:)p Black 396 3105 a Ft(\225)p Black
+60 w Fq(from_file)44 b(s)p Fv(:)20 b(The)g(document)e(is)j(read)f(from)
+f(\002le)i Fq(s)p Fv(;)g(you)e(may)h(specify)f(absolute)h(or)g(relati)n
+(v)o(e)f(path)h(names.)479 3213 y(The)g(\002le)h(name)f(must)g(be)g
+(encoded)e(as)j(UTF-8)f(string.)479 3362 y(There)g(is)h(an)f(optional)f
+(ar)o(gument)f Fq(~system_encoding)g Fv(specifying)g(the)j(character)d
+(encoding)h(which)g(is)i(used)479 3470 y(for)f(the)g(names)g(of)g(the)g
+(\002le)h(system.)f(F)o(or)g(e)o(xample,)e(if)j(this)g(encoding)d(is)j
+(ISO-8859-1)c(and)j Fq(s)g Fv(is)i(also)e(a)479 3578
+y(ISO-8859-1)e(string,)h(you)h(can)g(form)f(the)h(source:)479
+3717 y Fq(let)45 b(s_utf8)88 b(=)i(recode_string)42 b
+(~in_enc:`Enc_iso88591)g(~out_enc:`Enc_utf8)g(s)i(in)479
+3814 y(from_file)g(~system_encoding:`Enc_iso88591)39
+b(s_utf8)479 4005 y Fv(This)21 b Fq(source)e Fv(has)i(the)f(adv)n
+(antage)e(that)j(it)f(is)i(able)e(to)g(resolv)o(e)f(inner)h(e)o
+(xternal)f(entities;)h(i.e.)g(if)h(your)e(document)479
+4113 y(includes)g(data)g(from)g(another)f(\002le)i(\(using)f(the)g
+Fq(SYSTEM)g Fv(attrib)n(ute\),)g(this)g(mode)g(will)h(\002nd)f(that)h
+(\002le.)g(Ho)n(we)n(v)o(er)m(,)d(this)479 4221 y(mode)j(cannot)f
+(resolv)o(e)g Fq(PUBLIC)h Fv(identi\002ers)f(nor)h Fq(SYSTEM)g
+Fv(identi\002ers)f(other)h(than)g("\002le:".)p Black
+396 4370 a Ft(\225)p Black 60 w Fq(from_channel)43 b(ch)p
+Fv(:)21 b(The)e(document)g(is)i(read)e(from)h(the)g(channel)f
+Fq(ch)p Fv(.)h(In)g(general,)f(this)h(source)g(also)g(supports)479
+4478 y(\002le)h(URLs)g(found)e(in)h(the)g(document;)f(ho)n(we)n(v)o(er)
+m(,)e(by)j(def)o(ault)f(only)h(absolute)f(URLs)i(are)f(understood.)e
+(It)i(is)479 4586 y(possible)g(to)h(associate)f(an)g(ID)g(with)h(the)f
+(channel)f(such)h(that)g(the)g(resolv)o(er)f(kno)n(ws)h(ho)n(w)f(to)i
+(interpret)e(relati)n(v)o(e)479 4694 y(URLs:)479 4832
+y Fq(from_channel)43 b(~id:\(System)g("file:///dir/dir1/"\))f(ch)p
+Black 3800 5278 a Fr(75)p Black eop
+%%Page: 76 76
+76 75 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(There)g(is)h(also)f
+(the)g(~system_encoding)e(ar)o(gument)f(specifying)i(ho)n(w)h(\002le)h
+(names)e(are)i(encoded.)d(-)i(The)g(e)o(xample)479 687
+y(from)f(abo)o(v)o(e)g(can)h(also)g(be)h(written)f(\(b)n(ut)f(it)i(is)g
+(no)f(longer)f(possible)h(to)g(interpret)f(relati)n(v)o(e)h(URLs)h
+(because)e(there)h(is)479 795 y(no)g(~id)g(ar)o(gument,)e(and)i
+(computing)d(this)k(ar)o(gument)d(is)j(relati)n(v)o(ely)e(complicated)g
+(because)g(it)i(must)f(be)h(a)f(v)n(alid)479 903 y(URL\):)479
+1041 y Fq(let)45 b(ch)f(=)h(open_in)e(s)i(in)479 1138
+y(let)g(src)f(=)h(from_channel)d(~system_encoding:`Enc_iso88591)e(ch)45
+b(in)479 1236 y(...;)479 1333 y(close_in)f(ch)p Black
+396 1482 a Ft(\225)p Black 60 w Fq(from_string)f(s)p
+Fv(:)21 b(The)f(string)g Fq(s)g Fv(is)h(the)g(document)d(to)i(parse.)g
+(This)g(mode)f(is)j(not)d(able)h(to)h(interpret)e(\002le)i(names)479
+1590 y(of)f Fq(SYSTEM)g Fv(clauses,)g(nor)g(it)h(can)f(look)f(up)h
+Fq(PUBLIC)f Fv(identi\002ers.)479 1740 y(Normally)-5
+b(,)19 b(the)h(encoding)e(of)i(the)g(string)g(is)h(detected)e(as)i
+(usual)f(by)g(analyzing)f(the)h(XML)g(declaration,)e(if)j(an)o(y)-5
+b(.)479 1847 y(Ho)n(we)n(v)o(er)m(,)18 b(it)j(is)g(also)g(possible)f
+(to)g(specify)g(the)g(encoding)e(directly:)479 1986 y
+Fq(let)45 b(src)f(=)h(from_string)e(~fixenc:`ISO-8859-2)e(s)p
+Black 396 2177 a Ft(\225)p Black 60 w Fq(ExtID)j(\(id,)g(r\))p
+Fv(:)21 b(The)f(document)e(to)i(parse)g(is)h(denoted)e(by)h(the)g
+(identi\002er)g Fq(id)g Fv(\(either)f(a)i Fq(SYSTEM)f
+Fv(or)g Fq(PUBLIC)479 2285 y Fv(clause\),)g(and)g(this)g(identi\002er)g
+(is)h(interpreted)d(by)i(the)g(resolv)o(er)f Fq(r)p Fv(.)i(Use)f(this)h
+(mode)e(if)i(you)e(ha)n(v)o(e)h(written)g(your)f(o)n(wn)479
+2393 y(resolv)o(er)-5 b(.)479 2542 y(Which)20 b(character)f(sets)j(are)
+e(possible)g(depends)e(on)i(the)g(passed)h(resolv)o(er)d
+Fq(r)p Fv(.)p Black 396 2692 a Ft(\225)p Black 60 w Fq(Entity)44
+b(\(get_entity,)f(r\))p Fv(:)20 b(The)g(document)e(to)j(parse)f(is)h
+(returned)d(by)i(the)g(function)f(in)m(v)n(ocation)479
+2800 y Fq(get_entity)43 b(dtd)p Fv(,)20 b(where)g Fq(dtd)g
+Fv(is)h(the)g(DTD)f(object)g(to)g(use)g(\(it)h(may)f(be)g(empty\).)f
+(Inner)f(e)o(xternal)h(references)479 2908 y(occuring)g(in)h(this)h
+(entity)e(are)i(resolv)o(ed)d(using)i(the)g(resolv)o(er)f
+Fq(r)p Fv(.)479 3057 y(Which)h(character)f(sets)j(are)e(possible)g
+(depends)e(on)i(the)g(passed)h(resolv)o(er)d Fq(r)p Fv(.)-2
+3510 y Fp(4.2.2.)35 b(The)f(resolver)g(API)396 3677 y
+Fv(A)21 b(resolv)o(er)e(is)i(an)f(object)g(that)g(can)g(be)g(opened)e
+(lik)o(e)j(a)f(\002le,)h(b)n(ut)f(you)f(do)h(not)g(pass)g(the)h(\002le)
+f(name)g(to)g(the)g(resolv)o(er)m(,)f(b)n(ut)396 3785
+y(the)h(XML)h(identi\002er)e(of)h(the)g(entity)g(to)h(read)e(from)g
+(\(either)h(a)g Fq(SYSTEM)g Fv(or)g Fq(PUBLIC)g Fv(clause\).)f(When)h
+(opened,)f(the)396 3893 y(resolv)o(er)g(must)h(return)f(the)i
+Fq(Lexing.lexbuf)d Fv(that)i(reads)g(the)h(characters.)e(The)g(resolv)o
+(er)g(can)h(be)h(closed,)e(and)h(it)396 4001 y(can)g(be)g(cloned.)f
+(Furthermore,)f(it)j(is)g(possible)f(to)g(tell)h(the)f(resolv)o(er)f
+(which)h(character)f(set)i(it)g(should)e(assume.)h(-)g(The)396
+4109 y(follo)n(wing)f(from)g(Pxp_reader:)396 4289 y Fq(exception)44
+b(Not_competent)396 4386 y(exception)g(Not_resolvable)e(of)j(exn)396
+4581 y(class)f(type)g(resolver)g(=)486 4678 y(object)576
+4775 y(method)f(init_rep_encoding)f(:)j(rep_encoding)e(->)h(unit)576
+4872 y(method)f(init_warner)g(:)i(collect_warnings)d(->)j(unit)p
+Black 3798 5278 a Fr(76)p Black eop
+%%Page: 77 77
+77 76 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(rep_encoding)g(:)i(rep_encoding)576 676 y(method)e(open_in)h(:)h
+(ext_id)f(->)g(Lexing.lexbuf)576 773 y(method)f(close_in)h(:)h(unit)576
+870 y(method)e(change_encoding)g(:)h(string)g(->)h(unit)576
+967 y(method)e(clone)h(:)h(resolver)576 1065 y(method)e(close_all)h(:)g
+(unit)486 1162 y(end)396 1353 y Fv(The)20 b(resolv)o(er)f(object)h
+(must)g(w)o(ork)f(as)i(follo)n(ws:)p Black 396 1627 a
+Ft(\225)p Black 60 w Fv(When)f(the)h(parser)e(is)i(called,)f(it)h
+(tells)g(the)f(resolv)o(er)f(the)h(w)o(arner)g(object)f(and)h(the)g
+(internal)g(encoding)e(by)i(in)m(v)n(oking)479 1735 y
+Fq(init_warner)f Fv(and)h Fq(init_rep_encoding)p Fv(.)d(The)j(resolv)o
+(er)f(should)g(store)i(these)f(v)n(alues.)f(The)h(method)479
+1843 y Fq(rep_encoding)f Fv(should)g(return)g(the)h(internal)g
+(encoding.)p Black 396 1950 a Ft(\225)p Black 60 w Fv(If)g(the)h
+(parser)e(w)o(ants)i(to)f(read)g(from)f(the)h(resolv)o(er)m(,)e(it)j
+(in)m(v)n(ok)o(es)f(the)g(method)f Fq(open_in)p Fv(.)g(Either)h(the)g
+(resolv)o(er)479 2058 y(succeeds,)g(in)g(which)g(case)g(the)h
+Fq(Lexing.lexbuf)d Fv(reading)h(from)g(the)h(\002le)h(or)f(stream)g
+(must)g(be)h(returned,)d(or)479 2166 y(opening)h(f)o(ails.)h(In)g(the)g
+(latter)h(case)f(the)h(method)d(implementation)g(should)h(raise)i(an)f
+(e)o(xception)e(\(see)j(belo)n(w\).)p Black 396 2274
+a Ft(\225)p Black 60 w Fv(If)f(the)h(parser)e(\002nishes)i(reading,)d
+(it)j(calls)g(the)f Fq(close_in)g Fv(method.)p Black
+396 2382 a Ft(\225)p Black 60 w Fv(If)g(the)h(parser)e(\002nds)h(a)h
+(reference)d(to)j(another)e(e)o(xternal)f(entity)i(in)h(the)f(input)f
+(stream,)h(it)h(calls)g Fq(clone)f Fv(to)g(get)h(a)479
+2490 y(second)f(resolv)o(er)f(which)g(must)h(be)h(initially)f(closed)g
+(\(not)f(yet)h(connected)f(with)h(an)g(input)f(stream\).)h(The)g
+(parser)479 2598 y(then)g(in)m(v)n(ok)o(es)f Fq(open_in)h
+Fv(and)f(the)i(other)e(methods)g(as)i(described.)p Black
+396 2706 a Ft(\225)p Black 60 w Fv(If)f(you)g(already)f(kno)n(w)g(the)h
+(character)f(set)i(of)f(the)g(input)g(stream,)f(you)h(should)f(recode)g
+(it)i(to)f(the)g(internal)479 2814 y(encoding,)e(and)i(de\002ne)f(the)i
+(method)d Fq(change_encoding)h Fv(as)i(an)f(empty)f(method.)p
+Black 396 2922 a Ft(\225)p Black 60 w Fv(If)h(you)g(w)o(ant)g(to)g
+(support)f(multiple)h(e)o(xternal)f(character)g(sets,)i(the)f(object)f
+(must)i(follo)n(w)e(a)i(much)e(more)479 3030 y(complicated)g(protocol.)
+f(Directly)i(after)g Fq(open_in)f Fv(has)i(been)e(called,)h(the)g
+(resolv)o(er)f(must)h(return)f(a)i(le)o(xical)f(b)n(uf)n(fer)479
+3138 y(that)h(only)e(reads)h(one)g(byte)f(at)i(a)g(time.)f(This)g(is)h
+(only)f(possible)f(if)i(you)e(create)h(the)g(le)o(xical)g(b)n(uf)n(fer)
+f(with)479 3246 y Fq(Lexing.from_function)p Fv(;)e(the)j(function)d
+(must)j(then)f(al)o(w)o(ays)h(return)e(1)i(if)f(the)h(EOF)g(is)g(not)f
+(yet)h(reached,)e(and)h(0)479 3354 y(if)i(EOF)f(is)h(reached.)e(If)h
+(the)g(parser)g(has)g(read)g(the)g(\002rst)h(line)f(of)g(the)h
+(document,)c(it)k(will)g(in)m(v)n(ok)o(e)479 3461 y Fq(change_encoding)
+e Fv(to)h(tell)h(the)f(resolv)o(er)f(which)h(character)e(set)j(to)g
+(assume.)f(From)f(this)i(moment,)e(the)h(object)479 3569
+y(can)g(return)f(more)h(than)f(one)h(byte)g(at)g(once.)g(The)g(ar)o
+(gument)d(of)j Fq(change_encoding)f Fv(is)i(either)e(the)i(parameter)d
+(of)479 3677 y(the)i("encoding")e(attrib)n(ute)i(of)g(the)g(XML)h
+(declaration,)d(or)i(the)g(empty)f(string)h(if)h(there)e(is)j(not)d(an)
+o(y)h(XML)479 3785 y(declaration)f(or)h(if)g(the)h(declaration)d(does)i
+(not)g(contain)f(an)h(encoding)e(attrib)n(ute.)479 3935
+y(At)j(the)f(be)o(ginning)e(the)i(resolv)o(er)f(must)h(only)g(return)f
+(one)g(character)g(e)n(v)o(ery)g(time)h(something)f(is)i(read)f(from)f
+(the)479 4043 y(le)o(xical)h(b)n(uf)n(fer)-5 b(.)19 b(The)h(reason)f
+(for)h(this)h(is)g(that)f(you)f(otherwise)h(w)o(ould)f(not)h(e)o
+(xactly)g(kno)n(w)f(at)h(which)g(position)f(in)479 4151
+y(the)h(input)g(stream)g(the)g(character)f(set)i(changes.)479
+4300 y(If)f(you)g(w)o(ant)g(automatic)f(recognition)f(of)i(the)g
+(character)f(set,)i(it)g(is)g(up)f(to)g(the)g(resolv)o(er)f(object)h
+(to)g(implement)f(this.)p Black 396 4449 a Ft(\225)p
+Black 60 w Fv(If)h(an)g(error)g(occurs,)f(the)h(parser)g(calls)g(the)h
+(method)d Fq(close_all)i Fv(for)f(the)h(top-le)n(v)o(el)f(resolv)o(er;)
+g(this)i(method)479 4557 y(should)e(close)i(itself)g(\(if)f(not)g
+(already)f(done\))f(and)i(all)h(clones.)396 4748 y Fu(Exceptions.)f
+Fv(It)h(is)g(possible)f(to)g(chain)g(resolv)o(ers)f(such)h(that)g(when)
+g(the)g(\002rst)h(resolv)o(er)e(is)i(not)f(able)g(to)g(open)f(the)396
+4856 y(entity)-5 b(,)20 b(the)g(other)f(resolv)o(ers)g(of)h(the)g
+(chain)g(are)g(tried)g(in)g(turn.)g(The)g(method)e Fq(open_in)i
+Fv(should)f(raise)i(the)f(e)o(xception)p Black 3797 5278
+a Fr(77)p Black eop
+%%Page: 78 78
+78 77 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(Not_competent)f
+Fv(to)h(indicate)g(that)g(the)g(ne)o(xt)g(resolv)o(er)f(should)g(try)h
+(to)g(open)f(the)i(entity)-5 b(.)19 b(If)h(the)g(resolv)o(er)f(is)i
+(able)f(to)396 687 y(handle)f(the)i(ID,)f(b)n(ut)g(some)g(other)f
+(error)g(occurs,)g(the)i(e)o(xception)d Fq(Not_resolvable)g
+Fv(should)i(be)g(raised)g(to)g(force)396 795 y(that)h(the)f(chain)f
+(breaks.)396 944 y(Example:)g(Ho)n(w)h(to)h(de\002ne)e(a)i(resolv)o(er)
+e(that)h(is)h(equi)n(v)n(alent)e(to)h(from_string:)e(...)-2
+1314 y Fp(4.2.3.)35 b(Prede\002ned)f(resolver)h(components)396
+1482 y Fv(There)20 b(are)g(some)g(classes)h(in)f(Pxp_reader)e(that)j
+(de\002ne)e(common)g(resolv)o(er)f(beha)n(viour)-5 b(.)396
+1662 y Fq(class)44 b(resolve_read_this_channel)d(:)576
+1759 y(?id:ext_id)i(->)576 1856 y(?fixenc:encoding)f(->)576
+1953 y(?auto_close:bool)g(->)576 2050 y(in_channel)h(->)755
+2147 y(resolver)396 2338 y Fv(Reads)21 b(from)e(the)h(passed)g(channel)
+f(\(it)i(may)f(be)g(e)n(v)o(en)f(a)i(pipe\).)e(If)h(the)g
+Fq(~id)g Fv(ar)o(gument)e(is)j(passed)f(to)h(the)f(object,)f(the)396
+2446 y(created)h(resolv)o(er)f(accepts)h(only)f(this)i(ID.)f(Otherwise)
+g(all)h(IDs)f(are)g(accepted.)f(-)i(Once)f(the)g(resolv)o(er)f(has)h
+(been)396 2554 y(cloned,)f(it)h(does)g(not)f(accept)h(an)o(y)f(ID.)g
+(This)h(means)g(that)g(this)g(resolv)o(er)e(cannot)h(handle)g(inner)g
+(references)f(to)i(e)o(xternal)396 2662 y(entities.)h(Note)f(that)g
+(you)f(can)h(combine)f(this)i(resolv)o(er)e(with)h(another)f(resolv)o
+(er)g(that)h(can)g(handle)f(inner)g(references)396 2770
+y(\(such)h(as)h(resolv)o(e_as_\002le\);)d(see)j(class)g('combine')d
+(belo)n(w)-5 b(.)19 b(-)h(If)g(you)g(pass)g(the)h Fq(~fixenc)e
+Fv(ar)o(gument,)f(the)i(encoding)396 2878 y(of)g(the)g(channel)f(is)i
+(set)g(to)g(the)f(passed)g(v)n(alue,)f(re)o(gardless)g(of)h(an)o(y)f
+(auto-recognition)e(or)j(an)o(y)f(XML)h(declaration.)f(-)h(If)396
+2986 y Fq(~auto_close)43 b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o
+(ault\),)e(the)h(channel)f(is)i(closed)f(after)g(use.)g(If)g
+Fq(~auto_close)43 b(=)396 3094 y(false)p Fv(,)20 b(the)g(channel)f(is)i
+(left)g(open.)396 3315 y Fq(class)44 b(resolve_read_any_channel)d(:)576
+3413 y(?auto_close:bool)h(->)576 3510 y(channel_of_id:\(ext_id)f(->)j
+(\(in_channel)f(*)i(encoding)f(option\)\))f(->)755 3607
+y(resolver)396 3798 y Fv(This)21 b(resolv)o(er)e(calls)h(the)h
+(function)d Fq(~channel_of_id)h Fv(to)h(open)f(a)i(ne)n(w)f(channel)f
+(for)g(the)h(passed)g Fq(ext_id)p Fv(.)g(This)396 3906
+y(function)f(must)h(either)g(return)f(the)h(channel)f(and)h(the)g
+(encoding,)e(or)i(it)g(must)h(f)o(ail)f(with)h(Not_competent.)c(The)396
+4014 y(function)i(must)h(return)f Fq(None)h Fv(as)h(encoding)d(if)j
+(the)f(def)o(ault)f(mechanism)g(to)h(recognize)f(the)h(encoding)e
+(should)h(be)396 4122 y(used.)g(It)i(must)e(return)g
+Fq(Some)44 b(e)20 b Fv(if)g(it)h(is)f(already)f(kno)n(wn)f(that)i(the)g
+(encoding)d(of)j(the)f(channel)g(is)i Fq(e)p Fv(.)e(If)h
+Fq(~auto_close)396 4230 y(=)45 b(true)19 b Fv(\(which)g(is)h(the)f(def)
+o(ault\),)f(the)i(channel)e(is)i(closed)f(after)g(use.)h(If)f
+Fq(~auto_close)43 b(=)h(false)p Fv(,)19 b(the)h(channel)e(is)396
+4337 y(left)j(open.)396 4559 y Fq(class)44 b(resolve_read_url_channel)d
+(:)576 4656 y(?base_url:Neturl.url)g(->)576 4753 y(?auto_close:bool)h
+(->)576 4851 y(url_of_id:\(ext_id)g(->)i(Neturl.url\))f(->)p
+Black 3800 5278 a Fr(78)p Black eop
+%%Page: 79 79
+79 78 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq
+(channel_of_url:\(Neturl.url)40 b(->)45 b(\(in_channel)e(*)h(encoding)g
+(option\)\))f(->)755 676 y(resolver)396 867 y Fv(When)20
+b(this)h(resolv)o(er)e(gets)h(an)h(ID)f(to)g(read)g(from,)f(it)i(calls)
+g(the)f(function)e Fq(~url_of_id)h Fv(to)i(get)f(the)g(corresponding)
+396 975 y(URL.)h(This)f(URL)h(may)f(be)g(a)g(relati)n(v)o(e)g(URL;)h
+(ho)n(we)n(v)o(er)m(,)c(a)k(URL)g(scheme)f(must)g(be)g(used)g(which)f
+(contains)h(a)h(path.)396 1083 y(The)f(resolv)o(er)f(con)m(v)o(erts)g
+(the)h(URL)h(to)f(an)g(absolute)f(URL)i(if)g(necessary)-5
+b(.)19 b(The)g(second)h(function,)396 1191 y Fq(~channel_of_url)p
+Fv(,)e(is)j(fed)f(with)h(the)f(absolute)f(URL)i(as)g(input.)e(This)h
+(function)f(opens)g(the)i(resource)e(to)h(read)396 1299
+y(from,)f(and)h(returns)f(the)h(channel)f(and)h(the)g(encoding)e(of)i
+(the)g(resource.)396 1448 y(Both)g(functions,)f Fq(~url_of_id)g
+Fv(and)h Fq(~channel_of_url)p Fv(,)e(can)i(raise)g(Not_competent)e(to)i
+(indicate)g(that)g(the)396 1556 y(object)g(is)h(not)f(able)g(to)g(read)
+g(from)f(the)h(speci\002ed)g(resource.)f(Ho)n(we)n(v)o(er)m(,)f(there)i
+(is)h(a)f(dif)n(ference:)f(A)h(Not_competent)396 1664
+y(from)f Fq(~url_of_id)g Fv(is)j(left)e(as)h(it)g(is,)g(b)n(ut)f(a)h
+(Not_competent)c(from)i Fq(~channel_of_url)g Fv(is)i(con)m(v)o(erted)c
+(to)396 1772 y(Not_resolv)n(able.)h(So)i(only)g Fq(~url_of_id)f
+Fv(decides)h(which)f(URLs)i(are)f(accepted)g(by)f(the)i(resolv)o(er)e
+(and)g(which)h(not.)396 1921 y(The)g(function)f Fq(~channel_of_url)f
+Fv(must)i(return)f Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o(ault)f
+(mechanism)g(to)i(recognize)396 2029 y(the)f(encoding)f(should)g(be)h
+(used.)g(It)g(must)g(return)f Fq(Some)44 b(e)21 b Fv(if)g(it)f(is)i
+(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i(the)396
+2137 y(channel)f(is)i Fq(e)p Fv(.)396 2287 y(If)f Fq(~auto_close)43
+b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o(ault\),)e(the)h(channel)f
+(is)i(closed)f(after)g(use.)g(If)g Fq(~auto_close)43
+b(=)396 2395 y(false)p Fv(,)20 b(the)g(channel)f(is)i(left)g(open.)396
+2544 y(Objects)f(of)g(this)g(class)h(contain)e(a)h(base)g(URL)g(relati)
+n(v)o(e)f(to)h(which)g(relati)n(v)o(e)f(URLs)h(are)g(interpreted.)e
+(When)i(creating)e(a)396 2652 y(ne)n(w)i(object,)g(you)f(can)h(specify)
+f(the)i(base)f(URL)h(by)f(passing)f(it)i(as)g Fq(~base_url)e
+Fv(ar)o(gument.)f(When)i(an)g(e)o(xisting)396 2760 y(object)g(is)h
+(cloned,)e(the)h(base)g(URL)h(of)f(the)g(clone)g(is)h(the)f(URL)h(of)f
+(the)g(original)f(object.)h(-)g(Note)g(that)g(the)h(term)f("base)396
+2868 y(URL")h(has)f(a)h(strict)g(de\002nition)e(in)h(RFC)i(1808.)396
+3089 y Fq(class)44 b(resolve_read_this_string)d(:)576
+3187 y(?id:ext_id)i(->)576 3284 y(?fixenc:encoding)f(->)576
+3381 y(string)h(->)755 3478 y(resolver)396 3669 y Fv(Reads)21
+b(from)e(the)h(passed)g(string.)g(If)g(the)g Fq(~id)h
+Fv(ar)o(gument)c(is)k(passed)g(to)f(the)g(object,)g(the)g(created)f
+(resolv)o(er)g(accepts)396 3777 y(only)h(this)g(ID.)g(Otherwise)g(all)h
+(IDs)g(are)f(accepted.)f(-)h(Once)g(the)g(resolv)o(er)f(has)i(been)e
+(cloned,)g(it)i(does)f(not)g(accept)g(an)o(y)396 3885
+y(ID.)g(This)h(means)f(that)g(this)h(resolv)o(er)e(cannot)g(handle)g
+(inner)g(references)g(to)h(e)o(xternal)f(entities.)i(Note)f(that)g(you)
+f(can)396 3993 y(combine)g(this)i(resolv)o(er)e(with)h(another)f
+(resolv)o(er)g(that)h(can)g(handle)f(inner)g(references)g(\(such)h(as)h
+(resolv)o(e_as_\002le\);)396 4101 y(see)g(class)g('combine')d(belo)n(w)
+-5 b(.)19 b(-)i(If)f(you)f(pass)i(the)f Fq(~fixenc)f
+Fv(ar)o(gument,)f(the)i(encoding)e(of)i(the)g(string)g(is)h(set)g(to)g
+(the)396 4209 y(passed)f(v)n(alue,)g(re)o(gardless)e(of)i(an)o(y)g
+(auto-recognition)c(or)k(an)o(y)f(XML)i(declaration.)396
+4430 y Fq(class)44 b(resolve_read_any_string)d(:)576
+4527 y(string_of_id:\(ext_id)g(->)k(\(string)e(*)i(encoding)e
+(option\)\))h(->)755 4625 y(resolver)p Black 3800 5278
+a Fr(79)p Black eop
+%%Page: 80 80
+80 79 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(This)h(resolv)o(er)e
+(calls)h(the)h(function)d Fq(~string_of_id)h Fv(to)h(get)g(the)g
+(string)g(for)g(the)g(passed)g Fq(ext_id)p Fv(.)g(This)g(function)396
+687 y(must)g(either)g(return)f(the)i(string)e(and)h(the)g(encoding,)e
+(or)i(it)h(must)f(f)o(ail)h(with)f(Not_competent.)e(The)h(function)g
+(must)396 795 y(return)g Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o
+(ault)g(mechanism)e(to)j(recognize)d(the)i(encoding)f(should)g(be)h
+(used.)g(It)g(must)396 903 y(return)f Fq(Some)44 b(e)21
+b Fv(if)g(it)f(is)i(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i
+(the)g(string)g(is)h Fq(e)p Fv(.)396 1124 y Fq(class)44
+b(resolve_as_file)f(:)576 1222 y(?file_prefix:[)f(`Not_recognized)g(|)j
+(`Allowed)f(|)g(`Required)g(])g(->)576 1319 y(?host_prefix:[)e
+(`Not_recognized)g(|)j(`Allowed)f(|)g(`Required)g(])g(->)576
+1416 y(?system_encoding:encoding)c(->)576 1513 y(?url_of_id:\(ext_id)h
+(->)k(Neturl.url\))e(->)576 1610 y(?channel_of_url:)f(\(Neturl.url)h
+(->)h(\(in_channel)f(*)i(encoding)e(option\)\))h(->)576
+1707 y(unit)g(->)755 1804 y(resolver)396 1995 y Fv(Reads)21
+b(from)e(the)h(local)g(\002le)h(system.)f(Ev)o(ery)f(\002le)i(name)f
+(is)h(interpreted)d(as)j(\002le)g(name)f(of)f(the)i(local)f(\002le)h
+(system,)f(and)396 2103 y(the)g(referred)f(\002le)i(is)g(read.)396
+2253 y(The)f(full)g(form)f(of)h(a)h(\002le)g(URL)g(is:)g
+(\002le://host/path,)e(where)h('host')f(speci\002es)i(the)f(host)g
+(system)g(where)g(the)g(\002le)396 2361 y(identi\002ed)g('path')f
+(resides.)h(host)g(=)g("")h(or)f(host)g(=)h("localhost")e(are)h
+(accepted;)f(other)h(v)n(alues)f(will)i(raise)396 2468
+y(Not_competent.)d(The)i(standard)f(for)g(\002le)i(URLs)g(is)g
+(de\002ned)e(in)i(RFC)g(1738.)396 2618 y(Option)f Fq(~file_prefix)p
+Fv(:)e(Speci\002es)j(ho)n(w)f(the)g("\002le:")h(pre\002x)e(of)h(\002le)
+h(names)f(is)h(handled:)p Black 396 2850 a Ft(\225)p
+Black 60 w Fq(`Not_recognized:)p Fv(The)c(pre\002x)j(is)h(not)f
+(recognized.)p Black 396 2958 a Ft(\225)p Black 60 w
+Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h(allo)n(wed)e(b)n(ut)i(not)f
+(required)e(\(the)i(def)o(ault\).)p Black 396 3066 a
+Ft(\225)p Black 60 w Fq(`Required:)f Fv(The)h(pre\002x)g(is)h
+(required.)396 3257 y(Option)f Fq(~host_prefix:)e Fv(Speci\002es)j(ho)n
+(w)e(the)i("//host")f(phrase)f(of)h(\002le)h(names)f(is)h(handled:)p
+Black 396 3490 a Ft(\225)p Black 60 w Fq(`Not_recognized:)p
+Fv(The)c(pre\002x)j(is)h(not)f(recognized.)p Black 396
+3598 a Ft(\225)p Black 60 w Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h
+(allo)n(wed)e(b)n(ut)i(not)f(required)e(\(the)i(def)o(ault\).)p
+Black 396 3706 a Ft(\225)p Black 60 w Fq(`Required:)f
+Fv(The)h(pre\002x)g(is)h(required.)396 3896 y(Option)f
+Fq(~system_encoding:)e Fv(Speci\002es)i(the)g(encoding)e(of)i(\002le)h
+(names)f(of)g(the)g(local)g(\002le)h(system.)f(Def)o(ault:)396
+4004 y(UTF-8.)396 4154 y(Options)g Fq(~url_of_id)p Fv(,)f
+Fq(~channel_of_url)p Fv(:)f(Not)i(for)g(the)g(casual)g(user!)396
+4376 y Fq(class)44 b(combine)g(:)576 4473 y(?prefer:resolver)e(->)576
+4570 y(resolver)h(list)h(->)755 4667 y(resolver)p Black
+3800 5278 a Fr(80)p Black eop
+%%Page: 81 81
+81 80 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Combines)g(se)n(v)o
+(eral)f(resolv)o(er)g(objects.)h(If)g(a)h(concrete)e(entity)g(with)i
+(an)f Fq(ext_id)g Fv(is)h(to)f(be)g(opened,)f(the)h(combined)396
+687 y(resolv)o(er)f(tries)i(the)f(contained)f(resolv)o(ers)g(in)h(turn)
+g(until)g(a)g(resolv)o(er)f(accepts)h(opening)f(the)h(entity)g(\(i.e.)g
+(it)g(does)g(not)396 795 y(raise)h(Not_competent)c(on)j(open_in\).)396
+944 y(Clones:)h(If)f(the)g('clone')f(method)g(is)i(in)m(v)n(ok)o(ed)d
+(before)h('open_in',)e(all)k(contained)e(resolv)o(ers)g(are)h(cloned)f
+(separately)396 1052 y(and)h(again)f(combined.)f(If)i(the)g('clone')f
+(method)g(is)i(in)m(v)n(ok)o(ed)e(after)g('open_in')f(\(i.e.)i(while)g
+(the)g(resolv)o(er)f(is)i(open\),)396 1160 y(additionally)e(the)h
+(clone)f(of)h(the)h(acti)n(v)o(e)e(resolv)o(er)g(is)i(\003agged)f(as)g
+(being)g(preferred,)d(i.e.)k(it)f(is)i(tried)e(\002rst.)-2
+1662 y Fx(4.3.)39 b(The)g(DTD)g(c)m(lasses)396 1841 y
+Fr(Sorry)-5 b(,)21 b(not)f(yet)g(written.)h(P)-7 b(erhaps)20
+b(the)g(interface)g(de\002nition)e(of)j(Pxp_dtd)d(e)n(xpr)m(esses)j
+(the)f(same:)396 2063 y Fq(\(****************************************)o
+(******)o(******)o(******)o(******)o(*****)o(*\))396
+2160 y(\(*)3048 b(*\))396 2257 y(\(*)45 b(Pxp_dtd:)2643
+b(*\))396 2354 y(\(*)224 b(Object)44 b(model)g(of)g(document)g(type)g
+(declarations)939 b(*\))396 2452 y(\(*)3048 b(*\))396
+2549 y(\(****************************************)o(******)o(******)o
+(******)o(******)o(*****)o(*\))396 2743 y(\(*)45 b
+(======================================)o(======)o(======)o(======)o
+(======)o(=====)o(===)441 2840 y(*)g(OVERVIEW)441 2937
+y(*)441 3034 y(*)g(class)f(dtd)g(...............)e(represents)i(the)g
+(whole)g(DTD,)g(including)f(element)441 3132 y(*)1210
+b(declarations,)43 b(entity)h(declarations,)f(notation)441
+3229 y(*)1210 b(declarations,)43 b(and)h(processing)g(instructions)441
+3326 y(*)h(class)f(dtd_element)f(.......)g(represents)h(an)g(element)g
+(declaration)f(consisting)441 3423 y(*)1210 b(of)45 b(a)g(content)e
+(model)h(and)h(an)f(attribute)f(list)441 3520 y(*)1210
+b(declaration)441 3617 y(*)45 b(class)f(dtd_notation)f(......)g
+(represents)h(a)g(notation)g(declaration)441 3714 y(*)h(class)f
+(proc_instruction)e(..)i(represents)g(a)g(processing)f(instruction)441
+3811 y(*)i(======================================)o(======)o(======)o
+(======)o(======)o(=====)o(===)441 3909 y(*)441 4006
+y(*\))396 4297 y(class)f(dtd)h(:)486 4394 y(\(*)f(Creation:)531
+4491 y(*)134 b(new)44 b(dtd)531 4589 y(*)g(creates)g(a)h(new,)f(empty)g
+(DTD)g(object)g(without)g(any)g(declaration,)f(without)g(a)i(root)531
+4686 y(*)f(element,)g(without)g(an)g(ID.)531 4783 y(*\))p
+Black 3800 5278 a Fr(81)p Black eop
+%%Page: 82 82
+82 81 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 486 579 a Fq
+(Pxp_types.collect_warnings)40 b(-)p Fo(>)486 676 y Fq
+(Pxp_types.rep_encoding)h(-)p Fo(>)486 773 y Fq(object)576
+870 y(method)i(root)i(:)f(string)g(option)665 967 y(\(*)h(get)f(the)g
+(name)h(of)f(the)g(root)h(element)e(if)i(present)e(*\))576
+1162 y(method)g(set_root)h(:)h(string)e(-)p Fo(>)i Fq(unit)665
+1259 y(\(*)g(set)f(the)g(name)h(of)f(the)g(root)h(element.)e(This)h
+(method)g(can)g(be)h(invoked)710 1356 y(*)g(only)f(once)710
+1453 y(*\))576 1647 y(method)f(id)i(:)g(Pxp_types.dtd_id)d(option)665
+1745 y(\(*)j(get)f(the)g(identifier)g(for)g(this)g(DTD)g(*\))576
+1939 y(method)f(set_id)h(:)h(Pxp_types.dtd_id)d(-)p Fo(>)i
+Fq(unit)665 2036 y(\(*)h(set)f(the)g(identifier.)f(This)i(method)e(can)
+i(be)f(invoked)g(only)g(once)g(*\))576 2230 y(method)f(encoding)h(:)h
+(Pxp_types.rep_encoding)665 2327 y(\(*)g(returns)e(the)i(encoding)e
+(used)h(for)h(character)e(representation)g(*\))576 2619
+y(method)g(allow_arbitrary)g(:)h(unit)665 2716 y(\(*)h(After)f(this)g
+(method)g(has)g(been)g(invoked,)g(the)g(ob-)396 2813
+y(ject)g(changes)g(its)g(behaviour:)710 2910 y(*)h(-)f(elements)g(and)g
+(notations)g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h
+(an)710 3007 y(*)134 b(arbitrary)44 b(way;)g(the)g(methods)g("element")
+f(and)i("notation")e(indicate)g(this)710 3104 y(*)134
+b(by)45 b(raising)f(Undeclared)f(instead)g(of)i(Validation_error.)710
+3202 y(*\))576 3396 y(method)e(disallow_arbitrary)f(:)j(unit)576
+3590 y(method)e(arbitrary_allowed)f(:)j(bool)665 3687
+y(\(*)g(Returns)e(whether)h(arbitrary)f(contents)h(are)g(allowed)g(or)g
+(not.)h(*\))576 3882 y(method)e(standalone_declaration)f(:)i(bool)665
+3979 y(\(*)h(Whether)e(there)h(is)h(a)g('standalone')d(declaration)h
+(or)i(not.)f(Strictly)710 4076 y(*)h(speaking,)e(this)h(declaration)f
+(is)i(not)f(part)g(of)h(the)f(DTD,)g(but)h(it)f(is)710
+4173 y(*)h(included)e(here)h(because)g(of)h(practical)e(reasons.)710
+4270 y(*)i(If)f(not)h(set,)f(this)g(property)f(defaults)h(to)g
+('false'.)710 4367 y(*\))576 4561 y(method)f
+(set_standalone_declaration)e(:)k(bool)f(-)p Fo(>)g Fq(unit)665
+4659 y(\(*)h(Sets)f(the)g('standalone')f(declaration.)g(*\))p
+Black 3800 5278 a Fr(82)p Black eop
+%%Page: 83 83
+83 82 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(add_element)g(:)i(dtd_element)e(-)p Fo(>)h Fq(unit)665
+676 y(\(*)h(add)f(the)g(given)g(element)g(declaration)f(to)i(this)f
+(DTD.)g(Raises)g(Not_found)710 773 y(*)h(if)f(there)g(is)h(already)e
+(an)i(element)f(declaration)f(with)h(the)g(same)g(name.)710
+870 y(*\))576 1065 y(method)f(add_gen_entity)g(:)i(Pxp_entity.entity)d
+(-)p Fo(>)i Fq(bool)g(-)p Fo(>)g Fq(unit)665 1162 y(\(*)h
+(add_gen_entity)d(e)j(extdecl:)710 1259 y(*)g(add)f(the)g(entity)g('e')
+h(as)f(general)g(entity)g(to)g(this)g(DTD)h(\(general)e(entities)710
+1356 y(*)i(are)f(those)g(represented)f(by)i(&name;\).)e(If)i(there)f
+(is)g(already)g(a)g(declaration)710 1453 y(*)h(with)f(the)g(same)g
+(name,)g(the)h(second)f(definition)f(is)h(ignored;)g(as)g(excep-)396
+1550 y(tion)g(from)710 1647 y(*)h(this)f(rule,)g(entities)f(with)i
+(names)f("lt",)g("gt",)g("amp",)f("quot",)h(and)g("apos")710
+1745 y(*)h(may)f(only)g(be)h(redeclared)e(with)h(a)h(definition)e(that)
+h(is)h(equivalent)e(to)h(the)710 1842 y(*)h(standard)e(definition;)g
+(otherwise)h(a)g(Validation_error)e(is)j(raised.)710
+1939 y(*)710 2036 y(*)g('extdecl':)e('true')h(indicates)f(that)h(the)h
+(entity)e(declaration)g(occurs)h(in)710 2133 y(*)h(an)f(external)g
+(entity.)f(\(Used)h(for)h(the)f(standalone)f(check.\))710
+2230 y(*\))576 2424 y(method)g(add_par_entity)g(:)i(Pxp_entity.entity)d
+(-)p Fo(>)i Fq(unit)665 2522 y(\(*)h(add)f(the)g(given)g(entity)g(as)h
+(parameter)e(entity)h(to)g(this)h(DTD)f(\(parameter)710
+2619 y(*)h(entities)e(are)i(those)f(represented)f(by)h(\045name;\).)g
+(If)g(there)g(is)h(already)e(a)710 2716 y(*)i(declaration)e(with)h(the)
+g(same)g(name,)g(the)h(second)f(definition)f(is)h(ignored.)710
+2813 y(*\))576 3007 y(method)f(add_notation)g(:)i(dtd_notation)e(-)p
+Fo(>)h Fq(unit)665 3104 y(\(*)h(add)f(the)g(given)g(notation)g(to)g
+(this)h(DTD.)f(If)g(there)g(is)h(al-)396 3202 y(ready)f(a)h
+(declaration)710 3299 y(*)g(with)f(the)g(same)g(name,)g(a)h
+(Validation_error)d(is)j(raised.)710 3396 y(*\))576 3590
+y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p Fo(>)j
+Fq(unit)665 3687 y(\(*)g(add)f(the)g(given)g(processing)g(instruction)f
+(to)h(this)g(DTD.)g(*\))576 3882 y(method)f(element)h(:)h(string)f(-)p
+Fo(>)g Fq(dtd_element)665 3979 y(\(*)h(looks)f(up)g(the)h(element)e
+(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
+4076 y(*)h(Validation_error)d(if)i(the)h(element)e(can-)396
+4173 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
+4270 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
+f(instead.\))710 4367 y(*\))576 4561 y(method)f(element_names)g(:)i
+(string)f(list)665 4659 y(\(*)h(returns)e(the)i(list)f(of)g(the)h
+(names)f(of)g(all)h(element)e(declarations.)g(*\))576
+4853 y(method)g(gen_entity)h(:)g(string)g(-)p Fo(>)g
+Fq(\(Pxp_entity.entity)e(*)j(bool\))p Black 3800 5278
+a Fr(83)p Black eop
+%%Page: 84 84
+84 83 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
+b(let)f(e,)h(extdecl)e(=)i(obj)f(#)h(gen_entity)e(n:)710
+676 y(*)i(looks)f(up)g(the)h(general)e(entity)h('e')g(with)h(the)f
+(name)g('n'.)g(Raises)710 773 y(*)h(WF_error)e(if)i(the)f(entity)g
+(cannot)g(be)g(found.)710 870 y(*)h('extdecl':)e(indicates)g(whether)h
+(the)g(entity)g(declaration)f(occured)h(in)g(an)710 967
+y(*)h(external)e(entity.)710 1065 y(*\))576 1259 y(method)g
+(gen_entity_names)g(:)h(string)g(list)665 1356 y(\(*)h(returns)e(the)i
+(list)f(of)g(all)h(general)e(entity)h(names)g(*\))576
+1550 y(method)f(par_entity)h(:)g(string)g(-)p Fo(>)g
+Fq(Pxp_entity.entity)665 1647 y(\(*)h(looks)f(up)g(the)h(parameter)e
+(entity)h(with)g(the)g(given)g(name.)g(Raises)710 1745
+y(*)h(WF_error)e(if)i(the)f(entity)g(cannot)g(be)g(found.)710
+1842 y(*\))576 2036 y(method)f(par_entity_names)g(:)h(string)g(list)665
+2133 y(\(*)h(returns)e(the)i(list)f(of)g(all)h(parameter)e(entity)h
+(names)g(*\))576 2327 y(method)f(notation)h(:)h(string)e(-)p
+Fo(>)i Fq(dtd_notation)665 2424 y(\(*)g(looks)f(up)g(the)h(notation)e
+(declaration)g(with)h(the)h(given)f(name.)g(Raises)710
+2522 y(*)h(Validation_error)d(if)i(the)h(notation)e(can-)396
+2619 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710
+2716 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised)
+f(instead.\))710 2813 y(*\))576 3007 y(method)f(notation_names)g(:)i
+(string)e(list)665 3104 y(\(*)i(Returns)e(the)i(list)f(of)g(the)h
+(names)f(of)g(all)h(added)f(notations)f(*\))576 3299
+y(method)g(pinstr)h(:)h(string)f(-)p Fo(>)g Fq(proc_instruction)e(list)
+665 3396 y(\(*)j(looks)f(up)g(all)h(processing)e(instructions)g(with)h
+(the)g(given)g(target.)710 3493 y(*)h(The)f("target")g(is)g(the)g
+(identifier)g(following)f(")p Fo(<)p Fq(?".)710 3590
+y(*)i(Note:)f(It)g(is)h(not)f(possible)g(to)g(find)g(out)h(the)f(exact)
+g(position)f(of)i(the)710 3687 y(*)g(processing)e(instruction.)710
+3784 y(*\))576 3979 y(method)g(pinstr_names)g(:)i(string)f(list)665
+4076 y(\(*)h(Returns)e(the)i(list)f(of)g(the)h(names)f(\(targets\))f
+(of)i(all)f(added)g(pinstrs)f(*\))576 4270 y(method)g(validate)h(:)h
+(unit)665 4367 y(\(*)g(ensures)e(that)i(the)f(DTD)g(is)h(valid.)f(This)
+g(method)g(is)g(optimized)f(such)h(that)710 4464 y(*)h(actual)f
+(validation)f(is)h(only)g(performed)g(if)g(DTD)h(has)f(changed.)710
+4561 y(*)h(If)f(the)h(DTD)f(is)g(invalid,)g(mostly)g(a)g
+(Validation_error)f(is)h(raised,)710 4659 y(*)h(but)f(other)g
+(exceptions)f(are)i(possible,)e(too.)710 4756 y(*\))p
+Black 3800 5278 a Fr(84)p Black eop
+%%Page: 85 85
+85 84 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(only_deterministic_models)e(:)k(unit)665 676 y(\(*)g(Succeeds)e(if)i
+(all)f(regexp)g(content)g(models)f(are)i(deterministic.)710
+773 y(*)g(Otherwise)e(Validation_error.)710 870 y(*\))576
+1065 y(method)g(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(bool)f(-)396
+1162 y Fo(>)h Fq(unit)665 1259 y(\(*)g(write_compact_as_latin1)c(os)j
+(enc)h(doctype:)710 1356 y(*)g(Writes)f(the)g(DTD)g(as)h('enc'-encoded)
+d(string)i(to)h('os'.)f(If)g('doctype',)f(a)710 1453
+y(*)i(DTD)f(like)g Fo(<)p Fq(!DOCTYPE)f(root)i([)f(...)h(])p
+Fo(>)f Fq(is)g(written.)g(If)g('not)h(doctype',)710 1550
+y(*)g(only)f(the)g(declarations)f(are)h(written)g(\(the)g(material)g
+(within)g(the)710 1647 y(*)h(square)f(brackets\).)710
+1745 y(*\))576 1939 y(method)f(write_compact_as_latin1)e(:)k
+(Pxp_types.output_stream)c(-)p Fo(>)j Fq(bool)h(-)p Fo(>)f
+Fq(unit)665 2036 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)h(to)h
+(keep)f(compatibility)f(with)710 2133 y(*)i(older)f(versions)f(of)i
+(the)f(parser)710 2230 y(*\))576 2522 y
+(\(*---------------------------*\))576 2619 y(method)f(invalidate)h(:)g
+(unit)665 2716 y(\(*)h(INTERNAL)e(METHOD)h(*\))576 2813
+y(method)f(warner)h(:)h(Pxp_types.collect_warnings)665
+2910 y(\(*)g(INTERNAL)e(METHOD)h(*\))486 3007 y(end)396
+3396 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))396 3590 y(and)45 b(dtd_element)e(:)h(dtd)h(-)p
+Fo(>)f Fq(string)g(-)p Fo(>)486 3687 y Fq(\(*)g(Creation:)531
+3784 y(*)134 b(new)44 b(dtd_element)f(init_dtd)h(init_name:)531
+3882 y(*)g(creates)g(a)h(new)f(dtd_element)f(object)h(for)g(init_dtd)g
+(with)g(init_name.)531 3979 y(*)g(The)h(strings)e(are)i(represented)e
+(in)h(the)h(same)f(encoding)f(as)i(init_dtd.)531 4076
+y(*\))486 4173 y(object)576 4367 y(method)e(name)i(:)f(string)665
+4464 y(\(*)h(returns)e(the)i(name)f(of)g(the)h(declared)e(element)h
+(*\))576 4659 y(method)f(externally_declared)f(:)j(bool)665
+4756 y(\(*)g(returns)e(whether)h(the)g(element)g(declaration)f(occurs)h
+(in)g(an)h(external)710 4853 y(*)g(entity.)p Black 3800
+5278 a Fr(85)p Black eop
+%%Page: 86 86
+86 85 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 710 579 a Fq(*\))576
+773 y(method)43 b(content_model)g(:)i(Pxp_types.content_model_type)665
+870 y(\(*)g(get)f(the)g(content)g(model)g(of)h(this)f(element)f
+(declaration,)g(or)i(Unspecified)e(*\))576 1065 y(method)g(content_dfa)
+g(:)i(Pxp_dfa.dfa_definition)c(option)665 1162 y(\(*)k(return)f(the)g
+(DFA)g(of)h(the)f(content)g(model)g(if)g(there)g(is)h(a)f(DFA,)h(or)f
+(None.)710 1259 y(*)h(A)f(DFA)h(exists)f(only)g(for)g(regexp)g(style)g
+(content)g(models)f(which)h(are)710 1356 y(*)h(deterministic.)710
+1453 y(*\))576 1647 y(method)e(set_cm_and_extdecl)f(:)j
+(Pxp_types.content_model_type)40 b(-)p Fo(>)k Fq(bool)h(-)p
+Fo(>)f Fq(unit)665 1745 y(\(*)h(set_cm_and_extdecl)d(cm)i(extdecl:)710
+1842 y(*)h(set)f(the)g(content)g(model)g(to)h('cm'.)f(Once)g(the)g
+(content)g(model)g(is)g(not)710 1939 y(*)h(Unspecified,)e(it)h(cannot)g
+(be)g(set)h(to)f(a)h(different)e(value)h(again.)710 2036
+y(*)h(Furthermore,)e(it)h(is)h(set)f(whether)g(the)g(element)g(occurs)f
+(in)i(an)f(external)710 2133 y(*)h(entity)f(\('extdecl'\).)710
+2230 y(*\))576 2424 y(method)f(encoding)h(:)h(Pxp_types.rep_encoding)
+665 2522 y(\(*)g(Return)f(the)g(encoding)f(of)i(the)f(strings)g(*\))576
+2716 y(method)f(allow_arbitrary)g(:)h(unit)665 2813 y(\(*)h(After)f
+(this)g(method)g(has)g(been)g(invoked,)g(the)g(ob-)396
+2910 y(ject)g(changes)g(its)g(behaviour:)710 3007 y(*)h(-)f(attributes)
+g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h(an)710
+3104 y(*)134 b(arbitrary)44 b(way;)g(the)g(method)g("attribute")f
+(indicates)g(this)710 3202 y(*)134 b(by)45 b(raising)f(Undeclared)f
+(instead)g(of)i(Validation_error.)710 3299 y(*\))576
+3493 y(method)e(disallow_arbitrary)f(:)j(unit)576 3687
+y(method)e(arbitrary_allowed)f(:)j(bool)665 3784 y(\(*)g(Returns)e
+(whether)h(arbitrary)f(attributes)h(are)g(allowed)g(or)g(not.)g(*\))576
+3979 y(method)f(attribute)h(:)g(string)g(-)p Fo(>)1517
+4076 y Fq(Pxp_types.att_type)e(*)j(Pxp_types.att_default)665
+4173 y(\(*)g(get)f(the)g(type)h(and)f(default)g(value)g(of)g(a)h
+(declared)e(attribute,)g(or)i(raise)710 4270 y(*)g(Validation_error)d
+(if)i(the)h(attribute)e(does)h(not)h(exist.)710 4367
+y(*)g(If)f('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g
+(raised)g(instead)710 4464 y(*)h(of)f(Validation_error.)710
+4561 y(*\))576 4756 y(method)f
+(attribute_violates_standalone_declaration)38 b(:)1069
+4853 y(string)44 b(-)p Fo(>)g Fq(string)g(option)g(-)p
+Fo(>)g Fq(bool)p Black 3798 5278 a Fr(86)p Black eop
+%%Page: 87 87
+87 86 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45
+b(attribute_violates_standalone_declarat)o(ion)39 b(name)44
+b(v:)710 676 y(*)h(Checks)f(whether)f(the)i(attribute)e('name')h
+(violates)f(the)i("standalone")710 773 y(*)g(declaration)e(if)h(it)h
+(has)f(value)g('v'.)710 870 y(*)h(The)f(method)g(returns)g(true)g(if:)
+710 967 y(*)h(-)f(The)h(attribute)e(declaration)g(occurs)h(in)g(an)h
+(external)e(entity,)710 1065 y(*)i(and)f(if)h(one)f(of)g(the)h(two)f
+(conditions)f(holds:)710 1162 y(*)i(-)f(v)h(=)g(None,)f(and)g(there)g
+(is)h(a)f(default)g(for)g(the)h(attribute)e(value)710
+1259 y(*)i(-)f(v)h(=)g(Some)f(s,)g(and)h(the)f(type)g(of)h(the)f
+(attribute)f(is)i(not)f(CDATA,)710 1356 y(*)134 b(and)45
+b(s)f(changes)g(if)h(normalized)e(according)g(to)i(the)f(rules)g(of)g
+(the)710 1453 y(*)134 b(attribute)44 b(type.)710 1550
+y(*)710 1647 y(*)h(The)f(method)g(raises)g(Validation_error)e(if)i(the)
+h(attribute)e(does)h(not)g(exist.)710 1745 y(*)h(If)f
+('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g(raised)g
+(instead)710 1842 y(*)h(of)f(Validation_error.)710 1939
+y(*\))576 2133 y(method)f(attribute_names)g(:)h(string)g(list)665
+2230 y(\(*)h(get)f(the)g(list)h(of)f(all)g(declared)g(attributes)f(*\))
+576 2424 y(method)g(names_of_required_attributes)e(:)j(string)g(list)
+665 2522 y(\(*)h(get)f(the)g(list)h(of)f(all)g(attributes)g(that)g(are)
+g(specified)f(as)i(required)710 2619 y(*)g(attributes)710
+2716 y(*\))576 2910 y(method)e(id_attribute_name)f(:)j(string)f(option)
+665 3007 y(\(*)h(Returns)e(the)i(name)f(of)g(the)h(attribute)e(with)h
+(type)g(ID,)h(or)f(None.)g(*\))576 3202 y(method)f
+(idref_attribute_names)f(:)i(string)g(list)665 3299 y(\(*)h(Returns)e
+(the)i(names)f(of)g(the)h(attributes)e(with)h(type)g(IDREF)g(or)h
+(IDREFS.)e(*\))576 3493 y(method)g(add_attribute)g(:)i(string)f(-)p
+Fo(>)1607 3590 y Fq(Pxp_types.att_type)e(-)p Fo(>)531
+3687 y Fq(Pxp_types.att_default)f(-)p Fo(>)531 3784 y
+Fq(bool)j(-)p Fo(>)620 3882 y Fq(unit)665 3979 y(\(*)h(add_attribute)d
+(name)j(type)f(default)f(extdecl:)710 4076 y(*)i(add)f(an)h(attribute)e
+(declaration)g(for)h(an)h(attribute)e(with)h(the)h(given)e(name,)710
+4173 y(*)i(type,)f(and)g(default)g(value.)g(If)g(there)g(is)h(more)f
+(than)g(one)g(declaration)f(for)710 4270 y(*)i(an)f(attribute)g(name,)g
+(the)g(first)g(declara-)396 4367 y(tion)g(counts;)g(the)g(other)g
+(declarations)710 4464 y(*)h(are)f(ignored.)710 4561
+y(*)h('extdecl':)e(if)h(true,)g(the)h(attribute)e(declaration)g(occurs)
+h(in)g(an)h(external)710 4659 y(*)g(entity.)e(This)i(property)e(is)i
+(used)f(to)g(check)g(the)h("standalone")d(attribute.)710
+4756 y(*\))p Black 3797 5278 a Fr(87)p Black eop
+%%Page: 88 88
+88 87 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43
+b(validate)h(:)h(unit)665 676 y(\(*)g(checks)f(whether)f(this)h
+(element)g(declaration)f(\(i.e.)h(the)g(content)g(model)g(and)710
+773 y(*)h(all)f(attribute)f(declarations\))g(is)i(valid)f(for)g(the)g
+(associated)f(DTD.)710 870 y(*)i(Raises)f(mostly)f(Validation_error)g
+(if)h(the)g(validation)g(fails.)710 967 y(*\))576 1162
+y(method)f(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
+1259 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
+1356 y(*)h(Writes)f(the)g Fo(<)p Fq(!ELEMENT)f(...)h
+Fo(>)h Fq(declaration)e(to)h('os')h(as)f('enc'-)396 1453
+y(encoded)g(string.)710 1550 y(*\))576 1745 y(method)f
+(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 1842 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 1939 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 2036 y(*\))486 2133 y(end)396
+2327 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))396 2522 y(and)45 b(dtd_notation)d(:)j(string)f(-)p
+Fo(>)g Fq(Pxp_types.ext_id)e(-)p Fo(>)j Fq(Pxp_types.rep_encoding)c(-)p
+Fo(>)486 2619 y Fq(\(*)j(Creation:)531 2716 y(*)179 b(new)44
+b(dtd_notation)f(a_name)h(an_external_ID)e(init_encoding)531
+2813 y(*)i(creates)g(a)h(new)f(dtd_notation)f(object)h(with)g(the)g
+(given)g(name)g(and)h(the)f(given)531 2910 y(*)g(external)g(ID.)531
+3007 y(*\))486 3104 y(object)576 3202 y(method)f(name)i(:)f(string)576
+3299 y(method)f(ext_id)h(:)h(Pxp_types.ext_id)576 3396
+y(method)e(encoding)h(:)h(Pxp_types.rep_encoding)576
+3590 y(method)e(write)h(:)h(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665
+3687 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710
+3784 y(*)h(Writes)f(the)g Fo(<)p Fq(!NOTATION)f(...)h
+Fo(>)h Fq(declaration)e(to)h('os')g(as)h('enc'-encoded)710
+3882 y(*)g(string.)710 3979 y(*\))576 4173 y(method)e
+(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 4270 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 4367 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 4464 y(*\))486 4659 y(end)396
+4853 y(\(*)h(--------------------------------------)o(------)o(---)39
+b(*\))p Black 3800 5278 a Fr(88)p Black eop
+%%Page: 89 89
+89 88 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 676 a Fq(and)45
+b(proc_instruction)d(:)i(string)g(-)p Fo(>)h Fq(string)e(-)p
+Fo(>)i Fq(Pxp_types.rep_encoding)c(-)p Fo(>)486 773 y
+Fq(\(*)j(Creation:)531 870 y(*)134 b(new)44 b(proc_instruction)f
+(a_target)g(a_value)531 967 y(*)h(creates)g(a)h(new)f(proc_instruction)
+e(object)i(with)g(the)h(given)f(target)f(string)h(and)531
+1065 y(*)g(the)h(given)f(value)g(string.)531 1162 y(*)g(Note:)g(A)h
+(processing)e(instruction)g(is)i(written)e(as)i Fo(<)p
+Fq(?target)e(value?)p Fo(>)p Fq(.)531 1259 y(*\))486
+1356 y(object)576 1453 y(method)g(target)h(:)h(string)576
+1550 y(method)e(value)h(:)h(string)576 1647 y(method)e(encoding)h(:)h
+(Pxp_types.rep_encoding)576 1842 y(method)e(write)h(:)h
+(Pxp_types.output_stream)c(-)p Fo(>)j Fq(Pxp_types.encoding)e(-)p
+Fo(>)j Fq(unit)665 1939 y(\(*)g(write)f(os)g(enc:)710
+2036 y(*)h(Writes)f(the)g Fo(<)p Fq(?...?)p Fo(>)f Fq(PI)i(to)f('os')h
+(as)f('enc'-encoded)f(string.)710 2133 y(*\))576 2327
+y(method)g(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p
+Fo(>)j Fq(unit)665 2424 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)
+h(to)h(keep)f(compatibility)f(with)710 2522 y(*)i(older)f(versions)f
+(of)i(the)f(parser)710 2619 y(*\))576 2813 y(method)f(parse_pxp_option)
+g(:)h(\(string)g(*)h(string)e(*)i(\(string)f(*)g(string\))g(list\))665
+2910 y(\(*)h(Parses)f(a)g(PI)h(containing)e(a)i(PXP)f(option.)g(Such)g
+(PIs)g(are)g(formed)g(like:)710 3007 y(*)134 b Fo(<)p
+Fq(?target)44 b(option-name)f(option-att="value")f(option-att="value")f
+(...)k(?)p Fo(>)710 3104 y Fq(*)g(The)f(method)g(returns)g(a)g(triple)
+710 3202 y(*)134 b(\(target,)44 b(option-name,)f([option-att,)g(value;)
+g(...]\))710 3299 y(*)i(or)f(raises)g(Error.)710 3396
+y(*\))486 3590 y(end)396 3784 y(;;)-2 4286 y Fx(4.4.)39
+b(In)-6 b(v)l(oking)38 b(the)h(par)n(ser)396 4466 y Fv(Here)20
+b(a)h(description)e(of)h(Pxp_yacc.)-2 4794 y Fp(4.4.1.)35
+b(Defaults)p Black 3800 5278 a Fr(89)p Black eop
+%%Page: 90 90
+90 89 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(follo)n(wing)f
+(def)o(aults)g(are)i(a)n(v)n(ailable:)396 759 y Fq(val)45
+b(default_config)d(:)j(config)396 856 y(val)g(default_extension)d(:)i
+(\('a)h(node)f(extension\))f(as)h('a)396 953 y(val)h(default_spec)d(:)j
+(\('a)f(node)h(extension)e(as)h('a\))h(spec)-2 1406 y
+Fp(4.4.2.)35 b(P)l(ar)n(sing)f(functions)396 1574 y Fv(In)20
+b(the)g(follo)n(wing,)f(the)h(term)g("closed)g(document")e(refers)h(to)
+i(an)f(XML)g(structure)f(lik)o(e)396 1754 y Fo(<)p Fq(!DOCTYPE)43
+b(...)i([)f Fn(declarations)f Fq(])i Fo(>)396 1851 y(<)p
+Fn(root)p Fo(>)396 1948 y Fq(...)396 2045 y Fo(<)p Fq(/)p
+Fn(root)p Fo(>)396 2236 y Fv(The)20 b(term)g("fragment")e(refers)i(to)g
+(an)g(XML)h(structure)e(lik)o(e)396 2416 y Fo(<)p Fn(root)p
+Fo(>)396 2513 y Fq(...)396 2611 y Fo(<)p Fq(/)p Fn(root)p
+Fo(>)396 2802 y Fv(i.e.)h(only)g(to)g(one)g(isolated)g(element)f
+(instance.)396 3023 y Fq(val)45 b(parse_dtd_entity)d(:)i(config)g(->)h
+(source)f(->)g(dtd)396 3214 y Fv(P)o(arses)21 b(the)f(declarations)f
+(which)h(are)g(contained)e(in)j(the)f(entity)-5 b(,)19
+b(and)h(returns)f(them)h(as)h Fq(dtd)f Fv(object.)396
+3436 y Fq(val)45 b(extract_dtd_from_document_entity)39
+b(:)45 b(config)f(->)g(source)g(->)g(dtd)396 3627 y Fv(Extracts)20
+b(the)g(DTD)h(from)e(a)h(closed)g(document.)e(Both)i(the)h(internal)e
+(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted)d(and)396
+3735 y(combined)g(to)i(one)f Fq(dtd)h Fv(object.)f(This)h(function)e
+(does)h(not)h(parse)f(the)h(whole)f(document,)f(b)n(ut)i(only)e(the)i
+(parts)g(that)g(are)396 3843 y(necessary)g(to)g(e)o(xtract)f(the)i
+(DTD.)396 4064 y Fq(val)45 b(parse_document_entity)c(:)576
+4161 y(?transform_dtd:\(dtd)g(->)k(dtd\))f(->)576 4259
+y(?id_index:\('ext)e(index\))i(->)576 4356 y(config)f(->)576
+4453 y(source)g(->)576 4550 y('ext)h(spec)g(->)755 4647
+y('ext)g(document)p Black 3800 5278 a Fr(90)p Black eop
+%%Page: 91 91
+91 90 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(P)o(arses)h(a)g(closed)
+e(document)g(and)g(v)n(alidates)h(it)h(against)e(the)i(DTD)f(that)g(is)
+h(contained)e(in)h(the)h(document)d(\(internal)396 687
+y(and)i(e)o(xternal)f(subsets\).)h(The)g(option)f Fq(~transform_dtd)f
+Fv(can)i(be)g(used)g(to)g(transform)f(the)h(DTD)h(in)f(the)g(document,)
+396 795 y(and)g(to)g(use)h(the)f(transformed)e(DTD)i(for)g(v)n
+(alidation.)e(If)i Fq(~id_index)g Fv(is)h(speci\002ed,)e(an)h(inde)o(x)
+f(of)h(all)h(ID)f(attrib)n(utes)h(is)396 903 y(created.)396
+1124 y Fq(val)45 b(parse_wfdocument_entity)c(:)576 1222
+y(config)i(->)576 1319 y(source)g(->)576 1416 y('ext)h(spec)g(->)755
+1513 y('ext)g(document)396 1704 y Fv(P)o(arses)21 b(a)g(closed)e
+(document,)f(b)n(ut)j(checks)e(it)i(only)e(on)h(well-formedness.)396
+1926 y Fq(val)45 b(parse_content_entity)86 b(:)576 2023
+y(?id_index:\('ext)42 b(index\))i(->)576 2120 y(config)f(->)576
+2217 y(source)g(->)576 2314 y(dtd)h(->)576 2411 y('ext)g(spec)g(->)755
+2508 y('ext)g(node)396 2699 y Fv(P)o(arses)21 b(a)g(fragment,)d(and)h
+(v)n(alidates)h(the)g(element.)396 2921 y Fq(val)45 b
+(parse_wfcontent_entity)c(:)576 3018 y(config)i(->)576
+3115 y(source)g(->)576 3212 y('ext)h(spec)g(->)755 3310
+y('ext)g(node)396 3500 y Fv(P)o(arses)21 b(a)g(fragment,)d(b)n(ut)i
+(checks)g(it)g(only)g(on)g(well-formedness.)-2 3870 y
+Fp(4.4.3.)35 b(Con\002guration)f(options)396 4110 y Fq(type)44
+b(config)g(=)576 4207 y({)g(warner)g(:)h(collect_warnings;)665
+4304 y(errors_with_line_numbers)c(:)k(bool;)665 4401
+y(enable_pinstr_nodes)d(:)j(bool;)665 4499 y(enable_super_root_node)c
+(:)k(bool;)665 4596 y(enable_comment_nodes)d(:)i(bool;)665
+4693 y(encoding)g(:)g(rep_encoding;)665 4790 y
+(recognize_standalone_declaration)c(:)k(bool;)p Black
+3800 5278 a Fr(91)p Black eop
+%%Page: 92 92
+92 91 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 665 579 a Fq
+(store_element_positions)41 b(:)k(bool;)665 676 y(idref_pass)e(:)i
+(bool;)665 773 y(validate_by_dfa)e(:)h(bool;)665 870
+y(accept_only_deterministic_models)c(:)k(bool;)665 967
+y(...)576 1065 y(})p Black 396 1422 a Ft(\225)p Black
+60 w Fq(warner:)p Fv(The)19 b(parser)h(prints)f(w)o(arnings)h(by)f(in)m
+(v)n(oking)f(the)j(method)d Fq(warn)j Fv(for)e(this)i(w)o(arner)e
+(object.)h(\(Def)o(ault:)f(all)479 1530 y(w)o(arnings)h(are)g
+(dropped\))p Black 396 1637 a Ft(\225)p Black 60 w Fq
+(errors_with_line_numbers:)p Fv(If)c(true,)k(errors)f(contain)g(line)i
+(numbers;)d(if)j(f)o(alse,)f(errors)g(contain)f(only)g(byte)479
+1745 y(positions.)h(The)g(latter)g(mode)f(is)i(f)o(aster)-5
+b(.)21 b(\(Def)o(ault:)e(true\))p Black 396 1853 a Ft(\225)p
+Black 60 w Fq(enable_pinstr_nodes:)p Fv(If)e(true,)j(the)g(parser)f
+(creates)i(e)o(xtra)e(nodes)g(for)h(processing)f(instructions.)g(If)h
+(f)o(alse,)479 1961 y(processing)f(instructions)g(are)h(simply)g(added)
+f(to)i(the)f(element)f(or)h(document)f(surrounding)e(the)j
+(instructions.)479 2069 y(\(Def)o(ault:)g(f)o(alse\))p
+Black 396 2177 a Ft(\225)p Black 60 w Fq(enable_super_root_node:)p
+Fv(If)c(true,)k(the)g(parser)g(creates)g(an)g(e)o(xtra)g(node)f(which)g
+(is)j(the)e(parent)f(of)h(the)g(root)479 2285 y(of)g(the)g(document)f
+(tree.)h(This)g(node)f(is)i(called)f(super)g(root;)f(it)i(is)g(an)g
+(element)e(with)i(type)e Fq(T_super_root)p Fv(.)g(-)h(If)479
+2393 y(there)g(are)g(processing)f(instructions)g(outside)h(the)g(root)f
+(element)h(and)g(outside)f(the)i(DTD,)f(the)o(y)f(are)h(added)f(to)i
+(the)479 2501 y(super)f(root)f(instead)h(of)g(the)g(document.)e(-)j(If)
+f(f)o(alse,)g(the)g(super)g(root)g(node)f(is)i(not)f(created.)f(\(Def)o
+(ault:)h(f)o(alse\))p Black 396 2609 a Ft(\225)p Black
+60 w Fq(enable_comment_nodes:)p Fv(If)d(true,)i(the)i(parser)e(creates)
+h(nodes)g(for)f(comments)g(with)i(type)f Fq(T_comment)p
+Fv(;)f(if)479 2717 y(f)o(alse,)i(such)f(nodes)f(are)h(not)g(created.)f
+(\(Def)o(ault:)h(f)o(alse\))p Black 396 2825 a Ft(\225)p
+Black 60 w Fq(encoding:)p Fv(Speci\002es)f(the)i(internal)e(encoding)f
+(of)i(the)g(parser)-5 b(.)20 b(Most)g(strings)h(are)f(then)f
+(represented)g(according)479 2933 y(to)i(this)f(encoding;)f(ho)n(we)n
+(v)o(er)f(there)h(are)i(some)f(e)o(xceptions)e(\(especially)i
+Fq(ext_id)f Fv(v)n(alues)h(which)g(are)g(al)o(w)o(ays)479
+3041 y(UTF-8)g(encoded\).)e(\(Def)o(ault:)h(`Enc_iso88591\))p
+Black 396 3148 a Ft(\225)p Black 60 w Fq
+(recognize_standalone_declaration:)c Fv(If)21 b(true)e(and)h(if)h(the)f
+(parser)f(is)i(v)n(alidating,)e(the)479 3256 y Fq(standalone="yes")f
+Fv(declaration)h(forces)h(that)g(it)h(is)g(check)o(ed)e(whether)g(the)h
+(document)e(is)j(a)g(standalone)479 3364 y(document.)d(-)j(If)f(f)o
+(alse,)g(or)g(if)g(the)h(parser)e(is)i(in)g(well-formedness)d(mode,)h
+(such)h(declarations)f(are)h(ignored.)479 3472 y(\(Def)o(ault:)g
+(true\))p Black 396 3580 a Ft(\225)p Black 60 w Fq
+(store_element_positions:)d Fv(If)j(true,)g(for)f(e)n(v)o(ery)g
+(non-data)f(node)h(the)i(source)e(position)g(is)j(stored.)d(If)h(f)o
+(alse,)479 3688 y(the)g(position)g(information)e(is)j(lost.)f(If)g(a)n
+(v)n(ailable,)g(you)f(can)h(get)g(the)g(positions)g(of)g(nodes)f(by)h
+(in)m(v)n(oking)e(the)479 3796 y Fq(position)i Fv(method.)e(\(Def)o
+(ault:)i(true\))p Black 396 3904 a Ft(\225)p Black 60
+w Fq(idref_pass:)p Fv(If)e(true)i(and)g(if)g(there)g(is)h(an)f(ID)h
+(inde)o(x,)e(the)h(parser)f(checks)h(whether)f(e)n(v)o(ery)g(IDREF)i
+(or)e(IDREFS)479 4012 y(attrib)n(ute)h(refer)g(to)g(an)g(e)o(xisting)f
+(node;)h(this)g(requires)g(that)g(the)g(parser)g(tra)n(v)o(erses)g(the)
+g(whole)f(doument)g(tree.)h(If)479 4120 y(f)o(alse,)h(this)f(check)g
+(is)h(left)f(out.)g(\(Def)o(ault:)g(f)o(alse\))p Black
+396 4228 a Ft(\225)p Black 60 w Fq(validate_by_dfa:)p
+Fv(If)e(true)h(and)h(if)h(the)f(content)f(model)g(for)h(an)g(element)g
+(type)f(is)i(deterministic,)e(a)479 4336 y(deterministic)h(\002nite)g
+(automaton)e(is)j(used)f(to)h(v)n(alidate)e(whether)g(the)i(element)e
+(contents)h(match)f(the)i(content)479 4444 y(model)e(of)h(the)g(type.)g
+(If)g(f)o(alse,)g(or)g(if)g(a)g(DF)-6 b(A)21 b(is)g(not)f(a)n(v)n
+(ailable,)f(a)h(backtracking)e(algorithm)g(is)j(used)f(for)f(v)n
+(alidation.)479 4552 y(\(Def)o(ault:)h(true\))p Black
+396 4659 a Ft(\225)p Black 60 w Fq(accept_only_deterministic_models:)15
+b Fv(If)21 b(true,)e(only)h(deterministic)f(content)g(models)h(are)g
+(accepted;)f(if)479 4767 y(f)o(alse,)i(an)o(y)e(syntactically)h
+(correct)f(content)g(models)h(can)g(be)g(processed.)f(\(Def)o(ault:)g
+(true\))p Black 3800 5278 a Fr(92)p Black eop
+%%Page: 93 93
+93 92 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black -2 583 a Fp(4.4.4.)35
+b(Whic)o(h)f(con\002guration)g(should)g(I)f(use?)396
+751 y Fv(First,)21 b(I)f(recommend)e(to)i(v)n(ary)g(the)g(def)o(ault)f
+(con\002guration)f(instead)i(of)g(creating)f(a)i(ne)n(w)f
+(con\002guration)d(record.)i(F)o(or)396 859 y(instance,)h(to)g(set)h
+Fq(idref_pass)e Fv(to)i Fq(true)p Fv(,)e(change)g(the)i(def)o(ault)e
+(as)i(in:)396 1039 y Fq(let)45 b(config)e(=)i({)g(default_config)d
+(with)i(idref_pass)g(=)g(true)g(})396 1230 y Fv(The)20
+b(background)d(is)k(that)f(I)h(can)f(add)f(more)h(options)f(to)h(the)g
+(record)f(in)i(future)e(v)o(ersions)g(of)h(the)g(parser)f(without)396
+1338 y(breaking)g(your)f(programs.)396 1487 y Fu(Do)i(I)i(need)e(extra)
+f(nodes)i(f)n(or)f(pr)o(ocessing)g(instructions?)g Fv(By)g(def)o(ault,)
+g(such)g(nodes)f(are)h(not)g(created.)f(This)i(does)396
+1595 y(not)f(mean)g(that)g(the)g(processing)f(instructions)g(are)h
+(lost;)h(ho)n(we)n(v)o(er)m(,)d(you)h(cannot)g(\002nd)h(out)g(the)g(e)o
+(xact)g(location)f(where)396 1703 y(the)o(y)h(occur)-5
+b(.)19 b(F)o(or)h(e)o(xample,)e(the)j(follo)n(wing)d(XML)i(te)o(xt)396
+1883 y Fq(<x><?pi1?><y/><?pi2?></x>)396 2074 y Fv(will)h(normally)e
+(create)h(one)f(element)h(node)f(for)h Fq(x)g Fv(containing)e
+Fr(one)i Fv(subnode)f(for)g Fq(y)p Fv(.)h(The)g(processing)f
+(instructions)396 2182 y(are)h(attached)g(to)g Fq(x)h
+Fv(in)f(a)h(separate)e(hash)h(table;)h(you)e(can)h(access)h(them)e
+(using)h Fq(x)45 b(#)f(pinstr)g("pi1")20 b Fv(and)g Fq(x)44
+b(#)396 2290 y(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o(ely)-5
+b(.)18 b(The)i(information)d(is)k(lost)g(where)f(the)g(instructions)f
+(occur)g(within)h Fq(x)p Fv(.)396 2439 y(If)g(the)h(option)d
+Fq(enable_pinstr_nodes)g Fv(is)j(turned)e(on,)h(the)g(parser)f(creates)
+i(e)o(xtra)e(nodes)g Fq(pi1)i Fv(and)e Fq(pi2)i Fv(such)f(that)396
+2547 y(the)g(subnodes)f(of)h Fq(x)h Fv(are)f(no)n(w:)396
+2728 y Fq(x)45 b(#)g(sub_nodes)e(=)i([)f(pi1;)g(y;)h(pi2)f(])396
+2919 y Fv(The)20 b(e)o(xtra)g(nodes)f(contain)g(the)h(processing)f
+(instructions)g(in)i(the)f(usual)g(w)o(ay)-5 b(,)20 b(i.e.)g(you)f(can)
+h(access)h(them)f(using)f Fq(pi1)396 3026 y(#)45 b(pinstr)f("pi1")20
+b Fv(and)f Fq(pi2)45 b(#)f(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o
+(ely)-5 b(.)396 3176 y(Note)20 b(that)h(you)e(will)i(need)e(an)i(e)o(x)
+o(emplar)d(for)h(the)i(PI)f(nodes)g(\(see)g Fq(make_spec_from_alist)p
+Fv(\).)396 3325 y Fu(Do)g(I)i(need)e(a)h(super)g(r)o(oot)d(node?)i
+Fv(By)h(def)o(ault,)e(there)h(is)h(no)f(super)f(root)h(node.)f(The)h
+Fq(document)f Fv(object)h(refers)396 3433 y(directly)g(to)g(the)g(node)
+f(representing)f(the)j(root)e(element)h(of)g(the)g(document,)e(i.e.)396
+3613 y Fq(doc)45 b(#)f(root)g(=)h(r)396 3804 y Fv(if)21
+b Fq(r)f Fv(is)h(the)g(root)e(node.)g(This)h(is)i(sometimes)d(incon)m
+(v)o(enient:)f(\(1\))h(Some)h(algorithms)f(become)g(simpler)h(if)g(e)n
+(v)o(ery)f(node)396 3912 y(has)i(a)f(parent,)f(e)n(v)o(en)g(the)i(root)
+e(node.)g(\(2\))h(Some)g(standards)f(such)h(as)h(XP)o(ath)f(call)g(the)
+h("root)e(node")g(the)h(node)f(whose)396 4020 y(child)h(represents)f
+(the)i(root)e(of)h(the)g(document.)e(\(3\))i(The)g(super)f(root)h(node)
+f(can)h(serv)o(e)f(as)i(a)g(container)e(for)g(processing)396
+4128 y(instructions)g(outside)h(the)g(root)g(element.)f(Because)i(of)e
+(these)i(reasons,)e(it)i(is)g(possible)f(to)h(create)f(an)g(e)o(xtra)f
+(super)h(root)396 4236 y(node,)f(whose)h(child)g(is)h(the)f(root)g
+(node:)396 4416 y Fq(doc)45 b(#)f(root)g(=)h(sr)403 b(&&)396
+4513 y(sr)45 b(#)f(sub_nodes)g(=)g([)h(r)g(])396 4704
+y Fv(When)20 b(e)o(xtra)g(nodes)f(are)h(also)h(created)e(for)h
+(processing)f(instructions,)g(these)h(nodes)f(can)h(be)h(added)e(to)h
+(the)g(super)g(root)396 4812 y(node)f(if)h(the)o(y)e(occur)h(outside)g
+(the)g(root)g(element)g(\(reason)f(\(3\)\),)h(and)g(the)g(order)g
+(re\003ects)g(the)h(order)e(in)i(the)f(source)g(te)o(xt.)p
+Black 3800 5278 a Fr(93)p Black eop
+%%Page: 94 94
+94 93 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Note)g(that)h(you)e
+(will)i(need)e(an)i(e)o(x)o(emplar)d(for)h(the)i(super)e(root)h(node)f
+(\(see)h Fq(make_spec_from_alist)p Fv(\).)396 728 y Fu(What)g(is)h(the)
+g(effect)e(of)h(the)h(UTF-8)e(encoding?)h Fv(By)h(def)o(ault,)e(the)h
+(parser)g(represents)f(strings)h(\(with)g(fe)n(w)396
+836 y(e)o(xceptions\))e(as)j(ISO-8859-1)c(strings.)i(These)h(are)g
+(well-kno)n(wn,)d(and)j(there)f(are)h(tools)g(and)f(fonts)g(for)h(this)
+g(encoding.)396 986 y(Ho)n(we)n(v)o(er)m(,)e(internationalization)g
+(may)h(require)g(that)i(you)e(switch)h(o)o(v)o(er)f(to)i(UTF-8)e
+(encoding.)f(In)i(most)396 1094 y(en)m(vironments,)d(the)k(immediate)e
+(ef)n(fect)h(will)g(be)h(that)f(you)f(cannot)g(read)h(strings)g(with)g
+(character)f(codes)h(>=)h(160)e(an)o(y)396 1202 y(longer;)g(your)g
+(terminal)h(will)h(only)e(sho)n(w)h(funn)o(y)e(glyph)h(combinations.)f
+(It)i(is)h(strongly)e(recommended)e(to)k(install)396
+1310 y(Unicode)e(fonts)h(\(GNU)g(Unifont)f
+(\(http://czyborra.com/unifon)o(t/\),)c(Markus)k(K)o(uhn')-5
+b(s)19 b(fonts)396 1417 y(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/do)
+m(wnlo)o(ad/u)o(cs-fo)o(nts.tar)g(.g)o(z\)\))14 b(and)20
+b(terminal)f(emulators)h(that)g(can)g(handle)396 1525
+y(UTF-8)g(byte)g(sequences)f(\(http://myweb)m(.clark.net/pub/d)o(ick)o
+(e)o(y)o(/xter)o(m/x)o(term.)o(html\))o(.)c(Furthermore,)i(a)k(Unicode)
+396 1633 y(editor)f(may)f(be)i(helpful)e(\(such)g(as)i(Y)-9
+b(udit)20 b(\(ftp://metalab)m(.unc.edu/pub)o(/Linu)o(x/ap)o(ps/ed)o
+(itors/X/\)\))o(.)15 b(There)k(are)h(also)396 1741 y(F)-6
+b(A)h(Q)21 b(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/unico)o(de)o
+(.htm)o(l\))15 b(by)20 b(Markus)f(K)o(uhn.)396 1891 y(By)i(setting)f
+Fq(encoding)f Fv(to)i Fq(`Enc_utf8)e Fv(all)i(strings)f(originating)e
+(from)h(the)i(parsed)e(XML)h(document)e(are)396 1999
+y(represented)h(as)i(UTF-8)e(strings.)h(This)h(includes)e(not)h(only)f
+(character)g(data)h(and)g(attrib)n(ute)g(v)n(alues)g(b)n(ut)g(also)g
+(element)396 2107 y(names,)g(attrib)n(ute)g(names)g(and)f(so)i(on,)e
+(as)i(it)g(is)g(possible)f(to)h(use)f(an)o(y)f(Unicode)g(letter)i(to)f
+(form)f(such)h(names.)g(Strictly)396 2214 y(speaking,)f(PXP)i(is)g
+(only)e(XML-compliant)f(if)j(the)f(UTF-8)g(mode)f(is)i(used;)f
+(otherwise)g(it)h(will)g(ha)n(v)o(e)e(dif)n(\002culties)396
+2322 y(when)h(v)n(alidating)f(documents)f(containing)g
+(non-ISO-8859-1-names.)396 2472 y(This)j(mode)e(does)h(not)g(ha)n(v)o
+(e)f(an)o(y)h(impact)f(on)h(the)g(e)o(xternal)f(representation)f(of)i
+(documents.)f(The)g(character)g(set)396 2580 y(assumed)h(when)g
+(reading)e(a)j(document)d(is)j(set)g(in)g(the)f(XML)g(declaration,)e
+(and)i(character)f(set)i(when)e(writing)h(a)396 2688
+y(document)e(must)j(be)f(passed)g(to)g(the)g Fq(write)g
+Fv(method.)396 2837 y Fu(Ho)o(w)g(do)h(I)g(check)f(that)g(nodes)h
+(exist)f(which)h(ar)o(e)e(r)o(eferr)o(ed)g(by)i(IDREF)g(attrib)n(utes?)
+e Fv(First,)i(you)e(must)h(create)g(an)396 2945 y(inde)o(x)f(of)h(all)h
+(occurring)d(ID)i(attrib)n(utes:)396 3125 y Fq(let)45
+b(index)f(=)g(new)h(hash_index)396 3316 y Fv(This)21
+b(inde)o(x)e(must)h(be)g(passed)g(to)g(the)h(parsing)e(function:)396
+3496 y Fq(parse_document_entity)486 3593 y(~id_index:\(index)42
+b(:>)j(index\))486 3691 y(config)f(source)g(spec)396
+3882 y Fv(Ne)o(xt,)20 b(you)f(must)h(turn)g(on)g(the)g
+Fq(idref_pass)f Fv(mode:)396 4062 y Fq(let)45 b(config)e(=)i({)g
+(default_config)d(with)i(idref_pass)g(=)g(true)g(})396
+4253 y Fv(Note)20 b(that)h(no)n(w)e(the)i(whole)e(document)f(tree)j
+(will)g(be)f(tra)n(v)o(ersed,)f(and)g(e)n(v)o(ery)g(node)g(will)i(be)f
+(check)o(ed)f(for)h(IDREF)g(and)396 4361 y(IDREFS)h(attrib)n(utes.)f
+(If)g(the)g(tree)g(is)h(big,)f(this)h(may)f(tak)o(e)g(some)g(time.)396
+4510 y Fu(What)g(ar)o(e)g(deterministic)g(content)g(models?)g
+Fv(These)g(type)g(of)g(models)g(can)g(speed)f(up)h(the)g(v)n(alidation)
+f(checks;)396 4618 y(furthermore)f(the)o(y)h(ensure)g
+(SGML-compatibility)-5 b(.)18 b(In)i(particular)m(,)e(a)j(content)e
+(model)g(is)i(deterministic)e(if)i(the)f(parser)396 4726
+y(can)g(determine)f(the)h(actually)g(used)g(alternati)n(v)o(e)f(by)g
+(inspecting)g(only)h(the)g(current)f(tok)o(en.)g(F)o(or)h(e)o(xample,)e
+(this)396 4834 y(element)i(has)g(non-deterministic)e(contents:)p
+Black 3800 5278 a Fr(94)p Black eop
+%%Page: 95 95
+95 94 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i
+(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(<!ELEMENT)44
+b(x)g(\(\(u,v\))g(|)h(\(u,y+\))f(|)g(v\)>)396 770 y Fv(If)20
+b(the)h(\002rst)f(element)g(in)g Fq(x)h Fv(is)g Fq(u)p
+Fv(,)f(the)h(parser)e(does)h(not)g(kno)n(w)f(which)h(of)g(the)g
+(alternati)n(v)o(es)f Fq(\(u,v\))h Fv(or)g Fq(\(u,y+\))g
+Fv(will)396 878 y(w)o(ork;)g(the)g(parser)g(must)g(also)g(inspect)g
+(the)h(second)e(element)g(to)i(be)f(able)g(to)g(distinguish)g(between)f
+(the)h(alternati)n(v)o(es.)396 986 y(Because)h(such)f(look-ahead)d
+(\(or)j("guessing"\))e(is)k(required,)c(this)i(e)o(xample)f(is)i
+(non-deterministic.)396 1135 y(The)f(XML)g(standard)f(demands)g(that)i
+(content)e(models)g(must)i(be)f(deterministic.)f(So)h(it)h(is)g
+(recommended)c(to)k(turn)e(the)396 1243 y(option)g Fq
+(accept_only_deterministic_models)d Fv(on;)j(ho)n(we)n(v)o(er)m(,)f
+(PXP)j(can)f(also)h(process)e(non-deterministic)396 1351
+y(models)h(using)g(a)g(backtracking)e(algorithm.)396
+1500 y(Deterministic)i(models)g(ensure)f(that)h(v)n(alidation)f(can)h
+(be)g(performed)e(in)i(linear)g(time.)g(In)g(order)f(to)h(get)g(the)396
+1608 y(maximum)f(bene\002ts,)h(PXP)h(also)f(implements)f(a)i(special)f
+(v)n(alidator)f(that)h(pro\002ts)g(from)f(deterministic)h(models;)f
+(this)396 1716 y(is)i(the)g(deterministic)e(\002nite)h(automaton)f
+(\(DF)-6 b(A\).)19 b(This)i(v)n(alidator)d(is)k(enabled)d(per)g
+(element)h(type)g(if)g(the)g(element)396 1824 y(type)g(has)g(a)h
+(deterministic)e(model)h(and)f(if)i(the)f(option)f Fq(validate_by_dfa)f
+Fv(is)j(turned)e(on.)396 1974 y(In)h(general,)f(I)h(e)o(xpect)g(that)g
+(the)g(DF)-6 b(A)21 b(method)e(is)i(f)o(aster)f(than)g(the)g
+(backtracking)e(method;)g(especially)i(in)h(the)f(w)o(orst)396
+2082 y(case)h(the)f(DF)-6 b(A)21 b(tak)o(es)f(only)g(linear)f(time.)i
+(Ho)n(we)n(v)o(er)m(,)d(if)i(the)g(content)g(model)f(has)h(only)g(fe)n
+(w)g(alternati)n(v)o(es)f(and)h(the)396 2190 y(alternati)n(v)o(es)f(do)
+h(not)g(nest,)g(the)h(backtracking)c(algorithm)i(may)g(be)i(better)-5
+b(.)-2 2691 y Fx(4.5.)39 b(Updates)396 2871 y Fr(Some)20
+b(\(often)f(later)i(added\))d(featur)m(es)i(that)g(ar)m(e)h(otherwise)f
+(not)g(e)n(xplained)f(in)h(the)h(manual)d(b)n(ut)j(worth)f(to)g(be)396
+2979 y(mentioned.)p Black 396 3211 a Ft(\225)p Black
+60 w Fv(Methods)g(node_position,)d(node_path,)g(nth_node,)h(pre)n
+(vious_node,)e(ne)o(xt_node)h(for)j(nodes:)f(See)479
+3319 y(pxp_document.mli)p Black 396 3427 a Ft(\225)p
+Black 60 w Fv(Functions)h(to)g(determine)f(the)h(document)e(order)h(of)
+h(nodes:)f(compare,)g(create_ord_inde)o(x,)c(ord_number)m(,)479
+3535 y(ord_compare:)i(See)k(pxp_document.mli)p Black
+3800 5278 a Fr(95)p Black eop
+%%Page: 96 96
+96 95 bop Black Black Black Black eop
+%%Trailer
+end
+userdict /end-hook known{end-hook}if
+%%EOF
--- /dev/null
+<!ENTITY markup-dtd1.mli '
+
+(**********************************************************************)
+(* *)
+(* Pxp_dtd: *)
+(* Object model of document type declarations *)
+(* *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ * declarations, entity declarations, notation
+ * declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ * of a content model and an attribute list
+ * declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+ (* Creation:
+ * new dtd
+ * creates a new, empty DTD object without any declaration, without a root
+ * element, without an ID.
+ *)
+ Pxp_types.collect_warnings ->
+ Pxp_types.rep_encoding ->
+ object
+ method root : string option
+ (* get the name of the root element if present *)
+
+ method set_root : string -> unit
+ (* set the name of the root element. This method can be invoked
+ * only once
+ *)
+
+ method id : Pxp_types.dtd_id option
+ (* get the identifier for this DTD *)
+
+ method set_id : Pxp_types.dtd_id -> unit
+ (* set the identifier. This method can be invoked only once *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* returns the encoding used for character representation *)
+
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - elements and notations that have not been added may be used in an
+ * arbitrary way; the methods "element" and "notation" indicate this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary contents are allowed or not. *)
+
+ method standalone_declaration : bool
+ (* Whether there is a 'standalone' declaration or not. Strictly
+ * speaking, this declaration is not part of the DTD, but it is
+ * included here because of practical reasons.
+ * If not set, this property defaults to 'false'.
+ *)
+
+ method set_standalone_declaration : bool -> unit
+ (* Sets the 'standalone' declaration. *)
+
+
+ method add_element : dtd_element -> unit
+ (* add the given element declaration to this DTD. Raises Not_found
+ * if there is already an element declaration with the same name.
+ *)
+
+ method add_gen_entity : Pxp_entity.entity -> bool -> unit
+ (* add_gen_entity e extdecl:
+ * add the entity 'e' as general entity to this DTD (general entities
+ * are those represented by &name;). If there is already a declaration
+ * with the same name, the second definition is ignored; as exception from
+ * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+ * may only be redeclared with a definition that is equivalent to the
+ * standard definition; otherwise a Validation_error is raised.
+ *
+ * 'extdecl': 'true' indicates that the entity declaration occurs in
+ * an external entity. (Used for the standalone check.)
+ *)
+
+ method add_par_entity : Pxp_entity.entity -> unit
+ (* add the given entity as parameter entity to this DTD (parameter
+ * entities are those represented by &percent;name;). If there is already a
+ * declaration with the same name, the second definition is ignored.
+ *)
+
+ method add_notation : dtd_notation -> unit
+ (* add the given notation to this DTD. If there is already a declaration
+ * with the same name, a Validation_error is raised.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* add the given processing instruction to this DTD. *)
+
+ method element : string -> dtd_element
+ (* looks up the element declaration with the given name. Raises
+ * Validation_error if the element cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method element_names : string list
+ (* returns the list of the names of all element declarations. *)
+
+ method gen_entity : string -> (Pxp_entity.entity * bool)
+ (* let e, extdecl = obj # gen_entity n:
+ * looks up the general entity 'e' with the name 'n'. Raises
+ * WF_error if the entity cannot be found.
+ * 'extdecl': indicates whether the entity declaration occured in an
+ * external entity.
+ *)
+
+ method gen_entity_names : string list
+ (* returns the list of all general entity names *)
+
+ method par_entity : string -> Pxp_entity.entity
+ (* looks up the parameter entity with the given name. Raises
+ * WF_error if the entity cannot be found.
+ *)
+
+ method par_entity_names : string list
+ (* returns the list of all parameter entity names *)
+
+ method notation : string -> dtd_notation
+ (* looks up the notation declaration with the given name. Raises
+ * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method notation_names : string list
+ (* Returns the list of the names of all added notations *)
+
+ method pinstr : string -> proc_instruction list
+ (* looks up all processing instructions with the given target.
+ * The "target" is the identifier following "<?".
+ * Note: It is not possible to find out the exact position of the
+ * processing instruction.
+ *)
+
+ method pinstr_names : string list
+ (* Returns the list of the names (targets) of all added pinstrs *)
+
+ method validate : unit
+ (* ensures that the DTD is valid. This method is optimized such that
+ * actual validation is only performed if DTD has changed.
+ * If the DTD is invalid, mostly a Validation_error is raised,
+ * but other exceptions are possible, too.
+ *)
+
+ method only_deterministic_models : unit
+ (* Succeeds if all regexp content models are deterministic.
+ * Otherwise Validation_error.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
+ (* write_compact_as_latin1 os enc doctype:
+ * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
+ * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
+ * only the declarations are written (the material within the
+ * square brackets).
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+
+ (*----------------------------------------*)
+ method invalidate : unit
+ (* INTERNAL METHOD *)
+ method warner : Pxp_types.collect_warnings
+ (* INTERNAL METHOD *)
+ end
+
+'>
+<!ENTITY markup-dtd2.mli '
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -> string ->
+ (* Creation:
+ * new dtd_element init_dtd init_name:
+ * creates a new dtd_element object for init_dtd with init_name.
+ * The strings are represented in the same encoding as init_dtd.
+ *)
+ object
+
+ method name : string
+ (* returns the name of the declared element *)
+
+ method externally_declared : bool
+ (* returns whether the element declaration occurs in an external
+ * entity.
+ *)
+
+ method content_model : Pxp_types.content_model_type
+ (* get the content model of this element declaration, or Unspecified *)
+
+ method content_dfa : Pxp_dfa.dfa_definition option
+ (* return the DFA of the content model if there is a DFA, or None.
+ * A DFA exists only for regexp style content models which are
+ * deterministic.
+ *)
+
+ method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
+ (* set_cm_and_extdecl cm extdecl:
+ * set the content model to 'cm'. Once the content model is not
+ * Unspecified, it cannot be set to a different value again.
+ * Furthermore, it is set whether the element occurs in an external
+ * entity ('extdecl').
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Return the encoding of the strings *)
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - attributes that have not been added may be used in an
+ * arbitrary way; the method "attribute" indicates this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary attributes are allowed or not. *)
+
+ method attribute : string ->
+ Pxp_types.att_type * Pxp_types.att_default
+ (* get the type and default value of a declared attribute, or raise
+ * Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_violates_standalone_declaration :
+ string -> string option -> bool
+ (* attribute_violates_standalone_declaration name v:
+ * Checks whether the attribute 'name' violates the "standalone"
+ * declaration if it has value 'v'.
+ * The method returns true if:
+ * - The attribute declaration occurs in an external entity,
+ * and if one of the two conditions holds:
+ * - v = None, and there is a default for the attribute value
+ * - v = Some s, and the type of the attribute is not CDATA,
+ * and s changes if normalized according to the rules of the
+ * attribute type.
+ *
+ * The method raises Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_names : string list
+ (* get the list of all declared attributes *)
+
+ method names_of_required_attributes : string list
+ (* get the list of all attributes that are specified as required
+ * attributes
+ *)
+
+ method id_attribute_name : string option
+ (* Returns the name of the attribute with type ID, or None. *)
+
+ method idref_attribute_names : string list
+ (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+ method add_attribute : string ->
+ Pxp_types.att_type ->
+ Pxp_types.att_default ->
+ bool ->
+ unit
+ (* add_attribute name type default extdecl:
+ * add an attribute declaration for an attribute with the given name,
+ * type, and default value. If there is more than one declaration for
+ * an attribute name, the first declaration counts; the other declarations
+ * are ignored.
+ * 'extdecl': if true, the attribute declaration occurs in an external
+ * entity. This property is used to check the "standalone" attribute.
+ *)
+
+ method validate : unit
+ (* checks whether this element declaration (i.e. the content model and
+ * all attribute declarations) is valid for the associated DTD.
+ * Raises mostly Validation_error if the validation fails.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new dtd_notation a_name an_external_ID init_encoding
+ * creates a new dtd_notation object with the given name and the given
+ * external ID.
+ *)
+ object
+ method name : string
+ method ext_id : Pxp_types.ext_id
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
+ * string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new proc_instruction a_target a_value
+ * creates a new proc_instruction object with the given target string and
+ * the given value string.
+ * Note: A processing instruction is written as <?target value?>.
+ *)
+ object
+ method target : string
+ method value : string
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write os enc:
+ * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ method parse_pxp_option : (string * string * (string * string) list)
+ (* Parses a PI containing a PXP option. Such PIs are formed like:
+ * <?target option-name option-att="value" option-att="value" ... ?>
+ * The method returns a triple
+ * (target, option-name, [option-att, value; ...])
+ * or raises Error.
+ *)
+
+ end
+
+;;
+
+'>
--- /dev/null
+#! /bin/sh
+# (*
+exec ocamlfattop "$0"
+*) directory ".";;
+
+open Str;;
+
+let name_re = regexp "(\\*\\$[ \t]*\\([a-zA-Z0-9.-]*\\)[ \t]*\\*)";;
+let subst_re = regexp "[<>&'%]";;
+
+let begin_entity name =
+ "<!ENTITY " ^ name ^ " '";;
+
+let end_entity () =
+ "'>\n"
+;;
+
+
+let text = ref "" in
+let within_entity = ref false in
+try
+ while true do
+ let line = read_line() in
+ if string_match name_re line 0 then begin
+ let name = matched_group 1 line in
+ if !within_entity then
+ text := !text ^ "\n" ^ end_entity();
+ within_entity := false;
+ if name <> "-" then begin
+ text := !text ^ begin_entity name;
+ within_entity := true
+ end
+ end
+ else
+ if !within_entity then begin
+ let line' =
+ global_substitute subst_re
+ (fun s ->
+ let s' = matched_group 0 s in
+ match s' with
+ "<" -> "<"
+ | ">" -> ">"
+ | "&" -> "&"
+ | "'" -> "'"
+ | "%" -> "&percent;"
+ | _ -> assert false)
+ line
+ in
+ text := !text ^ "\n" ^ line'
+ end
+ done;
+with End_of_file ->
+ if !within_entity then
+ text := !text ^ "\n" ^ end_entity();
+ print_string !text
+;;
--- /dev/null
+.acronym {
+ font-weight: bold;
+ color: #c71585
+}
--- /dev/null
+<!DOCTYPE style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN" [
+
+<!-- The default is the print stylesheet. Call 'jade' with option '-ihtml'
+ to select the HTML stylesheet.
+ -->
+
+<!ENTITY % html "IGNORE">
+<![%html;[
+<!ENTITY % print "IGNORE">
+<!ENTITY docbook.dsl SYSTEM "docbook.dsl" CDATA dsssl>
+]]>
+<!ENTITY % print "INCLUDE">
+<![%print;[
+<!ENTITY docbook.dsl SYSTEM "docbook.dsl" CDATA dsssl>
+]]>
+]>
+<style-sheet>
+<style-specification use="docbook">
+<style-specification-body>
+
+;; HTML:
+
+<![%html;[
+
+(define %footnotes-at-end%
+ ;; Should footnotes appear at the end of HTML pages?
+ #t)
+
+(define %html-ext%
+ ;; Default extension for HTML output files
+ ".html")
+
+(define %root-filename%
+ ;; Name for the root HTML document
+ "index")
+
+(define %css-decoration%
+ ;; Enable CSS decoration of elements
+ #t)
+
+(define %stylesheet%
+ ;; Name of the stylesheet to use
+ "markup.css")
+
+(define %graphic-default-extension%
+ ;; Default extension for graphic FILEREFs
+ "gif")
+
+]]>
+
+;; printing:
+
+<![%print;[
+
+(define bop-footnotes
+ ;; Make "bottom-of-page" footnotes?
+ #t)
+
+(define %graphic-default-extension%
+ ;; Default extension for graphic FILEREFs
+ "ps")
+
+]]>
+
+;; both:
+
+(define %section-autolabel%
+ ;; Are sections enumerated?
+ #t)
+
+</style-specification-body>
+</style-specification>
+<external-specification id="docbook" document="docbook.dsl">
+</style-sheet>
--- /dev/null
+<!DOCTYPE book PUBLIC "-//Davenport//DTD DocBook V3.0//EN" [
+<!ENTITY markup "<acronym>PXP</acronym>">
+<!ENTITY pxp "<acronym>PXP</acronym>">
+<!ENTITY % readme.code.to-html SYSTEM "readme.ent">
+<!ENTITY apos "'">
+<!ENTITY percent "%">
+<!ENTITY % get.markup-yacc.mli SYSTEM "yacc.mli.ent">
+<!ENTITY % get.markup-dtd.mli SYSTEM "dtd.mli.ent">
+%readme.code.to-html;
+%get.markup-yacc.mli;
+%get.markup-dtd.mli;
+
+<!ENTITY fun "->"> <!-- function type operator -->
+
+]>
+
+
+<book>
+
+ <title>The PXP user's guide</title>
+ <bookinfo>
+ <!-- <bookbiblio> -->
+ <authorgroup>
+ <author>
+ <firstname>Gerd</firstname>
+ <surname>Stolpmann</surname>
+ <authorblurb>
+ <para>
+ <address>
+ <email>gerd@gerd-stolpmann.de</email>
+ </address>
+ </para>
+ </authorblurb>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>1999, 2000</year><holder>Gerd Stolpmann</holder>
+ </copyright>
+ <!-- </bookbiblio> -->
+
+ <abstract>
+ <para>
+&markup; is a validating parser for XML-1.0 which has been
+written entirely in Objective Caml.
+</para>
+ <formalpara>
+ <title>Download &markup;: </title>
+ <para>
+The free &markup; library can be downloaded at
+<ulink URL="http://www.ocaml-programming.de/packages/">
+http://www.ocaml-programming.de/packages/
+</ulink>. This user's guide is included.
+Newest releases of &markup; will be announced in
+<ulink URL="http://www.npc.de/ocaml/linkdb/">The OCaml Link
+Database</ulink>.
+</para>
+ </formalpara>
+ </abstract>
+
+ <legalnotice>
+ <title>License</title>
+ <para>
+This document, and the described software, "&markup;", are copyright by
+Gerd Stolpmann.
+</para>
+
+<para>
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this document and the "&markup;" software (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+</para>
+ <para>
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+</para>
+ <para>
+The Software is provided ``as is'', without warranty of any kind, express
+or implied, including but not limited to the warranties of
+merchantability, fitness for a particular purpose and noninfringement.
+In no event shall Gerd Stolpmann be liable for any claim, damages or
+other liability, whether in an action of contract, tort or otherwise,
+arising from, out of or in connection with the Software or the use or
+other dealings in the software.
+</para>
+ </legalnotice>
+
+ </bookinfo>
+
+
+<!-- ********************************************************************** -->
+
+ <part>
+ <title>User's guide</title>
+
+ <chapter>
+ <title>What is XML?</title>
+
+ <sect1>
+ <title>Introduction</title>
+
+ <para>XML (short for <emphasis>Extensible Markup Language</emphasis>)
+generalizes the idea that text documents are typically structured in sections,
+sub-sections, paragraphs, and so on. The format of the document is not fixed
+(as, for example, in HTML), but can be declared by a so-called DTD (document
+type definition). The DTD describes only the rules how the document can be
+structured, but not how the document can be processed. For example, if you want
+to publish a book that uses XML markup, you will need a processor that converts
+the XML file into a printable format such as Postscript. On the one hand, the
+structure of XML documents is configurable; on the other hand, there is no
+longer a canonical interpretation of the elements of the document; for example
+one XML DTD might want that paragraphes are delimited by
+<literal>para</literal> tags, and another DTD expects <literal>p</literal> tags
+for the same purpose. As a result, for every DTD a new processor is required.
+</para>
+
+ <para>
+Although XML can be used to express structured text documents it is not limited
+to this kind of application. For example, XML can also be used to exchange
+structured data over a network, or to simply store structured data in
+files. Note that XML documents cannot contain arbitrary binary data because
+some characters are forbidden; for some applications you need to encode binary
+data as text (e.g. the base 64 encoding).
+</para>
+
+
+ <sect2>
+ <title>The "hello world" example</title>
+ <para>
+The following example shows a very simple DTD, and a corresponding document
+instance. The document is structured such that it consists of sections, and
+that sections consist of paragraphs, and that paragraphs contain plain text:
+</para>
+
+ <programlisting>
+<![CDATA[<!ELEMENT document (section)+>
+<!ELEMENT section (paragraph)+>
+<!ELEMENT paragraph (#PCDATA)>
+]]>
+</programlisting>
+
+ <para>The following document is an instance of this DTD:</para>
+
+ <programlisting>
+<![CDATA[<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE document SYSTEM "simple.dtd">
+<document>
+ <section>
+ <paragraph>This is a paragraph of the first section.</paragraph>
+ <paragraph>This is another paragraph of the first section.</paragraph>
+ </section>
+ <section>
+ <paragraph>This is the only paragraph of the second section.</paragraph>
+ </section>
+</document>
+]]>
+</programlisting>
+
+ <para>As in HTML (and, of course, in grand-father SGML), the "pieces" of
+the document are delimited by element braces, i.e. such a piece begins with
+<literal><name-of-the-type-of-the-piece></literal> and ends with
+<literal></name-of-the-type-of-the-piece></literal>, and the pieces are
+called <emphasis>elements</emphasis>. Unlike HTML and SGML, both start tags and
+end tags (i.e. the delimiters written in angle brackets) can never be left
+out. For example, HTML calls the paragraphs simply <literal>p</literal>, and
+because paragraphs never contain paragraphs, a sequence of several paragraphs
+can be written as:
+
+<programlisting><![CDATA[<p>First paragraph
+<p>Second paragraph]]></programlisting>
+
+This is not possible in XML; continuing our example above we must always write
+
+<programlisting><![CDATA[<paragraph>First paragraph</paragraph>
+<paragraph>Second paragraph</paragraph>]]></programlisting>
+
+The rationale behind that is to (1) simplify the development of XML parsers
+(you need not convert the DTD into a deterministic finite automaton which is
+required to detect omitted tags), and to (2) make it possible to parse the
+document independent of whether the DTD is known or not.
+</para>
+
+<para>
+The first line of our sample document,
+
+<programlisting>
+<![CDATA[<?xml version="1.0" encoding="ISO-8859-1"?>]]>
+</programlisting>
+
+is the so-called <emphasis>XML declaration</emphasis>. It expresses that the
+document follows the conventions of XML version 1.0, and that the document is
+encoded using characters from the ISO-8859-1 character set (often known as
+"Latin 1", mostly used in Western Europe). Although the XML declaration is not
+mandatory, it is good style to include it; everybody sees at the first glance
+that the document uses XML markup and not the similar-looking HTML and SGML
+markup languages. If you omit the XML declaration, the parser will assume
+that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes
+it possible to distinguish between UTF-8 and UTF-16 automatically); these
+are encodings of Unicode's universal character set. (Note that &pxp;, unlike its
+predecessor "Markup", fully supports Unicode.)
+</para>
+
+<para>
+The second line,
+
+<programlisting>
+<![CDATA[<!DOCTYPE document SYSTEM "simple.dtd">]]>
+</programlisting>
+
+names the DTD that is going to be used for the rest of the document. In
+general, it is possible that the DTD consists of two parts, the so-called
+external and the internal subset. "External" means that the DTD exists as a
+second file; "internal" means that the DTD is included in the same file. In
+this example, there is only an external subset, and the system identifier
+"simple.dtd" specifies where the DTD file can be found. System identifiers are
+interpreted as URLs; for instance this would be legal:
+
+<programlisting>
+<![CDATA[<!DOCTYPE document SYSTEM "http://host/location/simple.dtd">]]>
+</programlisting>
+
+Please note that &pxp; cannot interpret HTTP identifiers by default, but it is
+possible to change the interpretation of system identifiers.
+</para>
+
+ <para>
+The word immediately following <literal>DOCTYPE</literal> determines which of
+the declared element types (here "document", "section", and "paragraph") is
+used for the outermost element, the <emphasis>root element</emphasis>. In this
+example it is <literal>document</literal> because the outermost element is
+delimited by <literal><document></literal> and
+<literal></document></literal>.
+</para>
+
+ <para>
+The DTD consists of three declarations for element types:
+<literal>document</literal>, <literal>section</literal>, and
+<literal>paragraph</literal>. Such a declaration has two parts:
+
+<programlisting>
+<!ELEMENT <replaceable>name</replaceable> <replaceable>content-model</replaceable>>
+</programlisting>
+
+The content model is a regular expression which describes the possible inner
+structure of the element. Here, <literal>document</literal> contains one or
+more sections, and a <literal>section</literal> contains one or more
+paragraphs. Note that these two element types are not allowed to contain
+arbitrary text. Only the <literal>paragraph</literal> element type is declared
+such that parsed character data (indicated by the symbol
+<literal>#PCDATA</literal>) is permitted.
+</para>
+
+ <para>
+See below for a detailed discussion of content models.
+</para>
+ </sect2>
+
+ <sect2>
+ <title>XML parsers and processors</title>
+ <para>
+XML documents are human-readable, but this is not the main purpose of this
+language. XML has been designed such that documents can be read by a program
+called an <emphasis>XML parser</emphasis>. The parser checks that the document
+is well-formatted, and it represents the document as objects of the programming
+language. There are two aspects when checking the document: First, the document
+must follow some basic syntactic rules, such as that tags are written in angle
+brackets, that for every start tag there must be a corresponding end tag and so
+on. A document respecting these rules is
+<emphasis>well-formed</emphasis>. Second, the document must match the DTD in
+which case the document is <emphasis>valid</emphasis>. Many parsers check only
+on well-formedness and ignore the DTD; &pxp; is designed such that it can
+even validate the document.
+</para>
+
+ <para>
+A parser does not make a sensible application, it only reads XML
+documents. The whole application working with XML-formatted data is called an
+<emphasis>XML processor</emphasis>. Often XML processors convert documents into
+another format, such as HTML or Postscript. Sometimes processors extract data
+of the documents and output the processed data again XML-formatted. The parser
+can help the application processing the document; for example it can provide
+means to access the document in a specific manner. &pxp; supports an
+object-oriented access layer specially.
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Discussion</title>
+ <para>
+As we have seen, there are two levels of description: On the one hand, XML can
+define rules about the format of a document (the DTD), on the other hand, XML
+expresses structured documents. There are a number of possible applications:
+</para>
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+XML can be used to express structured texts. Unlike HTML, there is no canonical
+interpretation; one would have to write a backend for the DTD that translates
+the structured texts into a format that existing browsers, printers
+etc. understand. The advantage of a self-defined document format is that it is
+possible to design the format in a more problem-oriented way. For example, if
+the task is to extract reports from a database, one can use a DTD that reflects
+the structure of the report or the database. A possible approach would be to
+have an element type for every database table and for every column. Once the
+DTD has been designed, the report procedure can be splitted up in a part that
+selects the database rows and outputs them as an XML document according to the
+DTD, and in a part that translates the document into other formats. Of course,
+the latter part can be solved in a generic way, e.g. there may be configurable
+backends for all DTDs that follow the approach and have element types for
+tables and columns.
+</para>
+
+ <para>
+XML plays the role of a configurable intermediate format. The database
+extraction function can be written without having to know the details of
+typesetting; the backends can be written without having to know the details of
+the database.
+</para>
+
+ <para>
+Of course, there are traditional solutions. One can define an ad hoc
+intermediate text file format. This disadvantage is that there are no names for
+the pieces of the format, and that such formats usually lack of documentation
+because of this. Another solution would be to have a binary representation,
+either as language-dependent or language-independent structure (example of the
+latter can be found in RPC implementations). The disadvantage is that it is
+harder to view such representations, one has to write pretty printers for this
+purpose. It is also more difficult to enter test data; XML is plain text that
+can be written using an arbitrary editor (Emacs has even a good XML mode,
+PSGML). All these alternatives suffer from a missing structure checker,
+i.e. the programs processing these formats usually do not check the input file
+or input object in detail; XML parsers check the syntax of the input (the
+so-called well-formedness check), and the advanced parsers like &markup; even
+verify that the structure matches the DTD (the so-called validation).
+</para>
+
+ </listitem>
+
+ <listitem>
+ <para>
+XML can be used as configurable communication language. A fundamental problem
+of every communication is that sender and receiver must follow the same
+conventions about the language. For data exchange, the question is usually
+which data records and fields are available, how they are syntactically
+composed, and which values are possible for the various fields. Similar
+questions arise for text document exchange. XML does not answer these problems
+completely, but it reduces the number of ambiguities for such conventions: The
+outlines of the syntax are specified by the DTD (but not necessarily the
+details), and XML introduces canonical names for the components of documents
+such that it is simpler to describe the rest of the syntax and the semantics
+informally.
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+XML is a data storage format. Currently, every software product tends to use
+its own way to store data; commercial software often does not describe such
+formats, and it is a pain to integrate such software into a bigger project.
+XML can help to improve this situation when several applications share the same
+syntax of data files. DTDs are then neutral instances that check the format of
+data files independent of applications.
+</para>
+ </listitem>
+
+ </itemizedlist>
+ </sect2>
+ </sect1>
+
+
+ <!-- ================================================== -->
+
+
+ <sect1>
+ <title>Highlights of XML</title>
+
+ <para>
+This section explains many of the features of XML, but not all, and some
+features not in detail. For a complete description, see the <ulink
+url="http://www.w3.org/TR/1998/REC-xml-19980210.html">XML
+specification</ulink>.
+</para>
+
+ <sect2>
+ <title>The DTD and the instance</title>
+ <para>
+The DTD contains various declarations; in general you can only use a feature if
+you have previously declared it. The document instance file may contain the
+full DTD, but it is also possible to split the DTD into an internal and an
+external subset. A document must begin as follows if the full DTD is included:
+
+<programlisting>
+<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
+<!DOCTYPE <replaceable>root</replaceable> [
+ <replaceable>Declarations</replaceable>
+]>
+</programlisting>
+
+These declarations are called the <emphasis>internal subset</emphasis>. Note
+that the usage of entities and conditional sections is restricted within the
+internal subset.
+</para>
+ <para>
+If the declarations are located in a different file, you can refer to this file
+as follows:
+
+<programlisting>
+<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
+<!DOCTYPE <replaceable>root</replaceable> SYSTEM "<replaceable>file name</replaceable>">
+</programlisting>
+
+The declarations in the file are called the <emphasis>external
+subset</emphasis>. The file name is called the <emphasis>system
+identifier</emphasis>.
+It is also possible to refer to the file by a so-called
+<emphasis>public identifier</emphasis>, but most XML applications won't use
+this feature.
+</para>
+ <para>
+You can also specify both internal and external subsets. In this case, the
+declarations of both subsets are mixed, and if there are conflicts, the
+declaration of the internal subset overrides those of the external subset with
+the same name. This looks as follows:
+
+<programlisting>
+<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
+<!DOCTYPE <replaceable>root</replaceable> SYSTEM "<replaceable>file name</replaceable>" [
+ <replaceable>Declarations</replaceable>
+]>
+</programlisting>
+</para>
+
+ <para>
+The XML declaration (the string beginning with <literal><?xml</literal> and
+ending at <literal>?></literal>) should specify the encoding of the
+file. Common values are UTF-8, and the ISO-8859 series of character sets. Note
+that every file parsed by the XML processor can begin with an XML declaration
+and that every file may have its own encoding.
+</para>
+
+ <para>
+The name of the root element must be mentioned directly after the
+<literal>DOCTYPE</literal> string. This means that a full document instance
+looks like
+
+<programlisting>
+<?xml version="1.0" encoding="<replaceable>Your encoding</replaceable>"?>
+<!DOCTYPE <replaceable>root</replaceable> SYSTEM "<replaceable>file name</replaceable>" [
+ <replaceable>Declarations</replaceable>
+]>
+
+<<replaceable>root</replaceable>>
+ <replaceable>inner contents</replaceable>
+</<replaceable>root</replaceable>>
+</programlisting>
+</para>
+ </sect2>
+
+ <!-- ======================================== -->
+
+ <sect2>
+ <title>Reserved characters</title>
+ <para>
+Some characters are generally reserved to indicate markup such that they cannot
+be used for character data. These characters are <, >, and
+&. Furthermore, single and double quotes are sometimes reserved. If you
+want to include such a character as character, write it as follows:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>&lt;</literal> instead of <
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>&gt;</literal> instead of >
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>&amp;</literal> instead of &
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>&apos;</literal> instead of '
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>&quot;</literal> instead of "
+</para>
+ </listitem>
+ </itemizedlist>
+
+All other characters are free in the document instance. It is possible to
+include a character by its position in the Unicode alphabet:
+
+<programlisting>
+&#<replaceable>n</replaceable>;
+</programlisting>
+
+where <replaceable>n</replaceable> is the decimal number of the
+character. Alternatively, you can specify the character by its hexadecimal
+number:
+
+<programlisting>
+&#x<replaceable>n</replaceable>;
+</programlisting>
+
+In the scope of declarations, the character % is no longer free. To include it
+as character, you must use the notations <literal>&#37;</literal> or
+<literal>&#x25;</literal>.
+</para>
+
+ <para>Note that besides &lt;, &gt;, &amp;,
+&apos;, and &quot; there are no predefines character entities. This is
+different from HTML which defines a list of characters that can be referenced
+by name (e.g. &auml; for ä); however, if you prefer named characters, you
+can declare such entities yourself (see below).</para>
+ </sect2>
+
+
+ <!-- ======================================== -->
+
+ <sect2>
+ <title>Elements and ELEMENT declarations</title>
+
+ <para>
+Elements structure the document instance in a hierarchical way. There is a
+top-level element, the <emphasis>root element</emphasis>, which contains a
+sequence of inner elements and character sections. The inner elements are
+structured in the same way. Every element has an <emphasis>element
+type</emphasis>. The beginning of the element is indicated by a <emphasis>start
+tag</emphasis>, written
+
+<programlisting>
+<<replaceable>element-type</replaceable>>
+</programlisting>
+
+and the element continues until the corresponding <emphasis>end tag</emphasis>
+is reached:
+
+<programlisting>
+</<replaceable>element-type</replaceable>>
+</programlisting>
+
+In XML, it is not allowed to omit start or end tags, even if the DTD would
+permit this. Note that there are no special rules how to interpret spaces or
+newlines near start or end tags; all spaces and newlines count.
+</para>
+
+ <para>
+Every element type must be declared before it can be used. The declaration
+consists of two parts: the ELEMENT declaration describes the content model,
+i.e. which inner elements are allowed; the ATTLIST declaration describes the
+attributes of the element.
+</para>
+
+ <para>
+An element can simply allow everything as content. This is written:
+
+<programlisting>
+<!ELEMENT <replaceable>name</replaceable> ANY>
+</programlisting>
+
+On the opposite, an element can be forced to be empty; declared by:
+
+<programlisting>
+<!ELEMENT <replaceable>name</replaceable> EMPTY>
+</programlisting>
+
+Note that there is an abbreviated notation for empty element instances:
+<literal><<replaceable>name</replaceable>/></literal>.
+</para>
+
+ <para>
+There are two more sophisticated forms of declarations: so-called
+<emphasis>mixed declarations</emphasis>, and <emphasis>regular
+expressions</emphasis>. An element with mixed content contains character data
+interspersed with inner elements, and the set of allowed inner elements can be
+specified. In contrast to this, a regular expression declaration does not allow
+character data, but the inner elements can be described by the more powerful
+means of regular expressions.
+</para>
+
+ <para>
+A declaration for mixed content looks as follows:
+
+<programlisting>
+<!ELEMENT <replaceable>name</replaceable> (#PCDATA | <replaceable>element<subscript>1</subscript></replaceable> | ... | <replaceable>element<subscript>n</subscript></replaceable> )*>
+</programlisting>
+
+or if you do not want to allow any inner element, simply
+
+<programlisting>
+<!ELEMENT <replaceable>name</replaceable> (#PCDATA)>
+</programlisting>
+</para>
+
+
+<blockquote>
+ <title>Example</title>
+ <para>
+If element type <literal>q</literal> is declared as
+
+<programlisting>
+<![CDATA[<!ELEMENT q (#PCDATA | r | s)*>]]>
+</programlisting>
+
+this is a legal instance:
+
+<programlisting>
+<![CDATA[<q>This is character data<r></r>with <s></s>inner elements</q>]]>
+</programlisting>
+
+But this is illegal because <literal>t</literal> has not been enumerated in the
+declaration:
+
+<programlisting>
+<![CDATA[<q>This is character data<r></r>with <t></t>inner elements</q>]]>
+</programlisting>
+</para>
+ </blockquote>
+
+ <para>
+The other form uses a regular expression to describe the possible contents:
+
+<programlisting>
+<!ELEMENT <replaceable>name</replaceable> <replaceable>regexp</replaceable>>
+</programlisting>
+
+The following well-known regexp operators are allowed:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal><replaceable>element-name</replaceable></literal>
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<literal>(<replaceable>subexpr<subscript>1</subscript></replaceable> ,</literal> ... <literal>, <replaceable>subexpr<subscript>n</subscript></replaceable> )</literal>
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<literal>(<replaceable>subexpr<subscript>1</subscript></replaceable> |</literal> ... <literal>| <replaceable>subexpr<subscript>n</subscript></replaceable> )</literal>
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<literal><replaceable>subexpr</replaceable>*</literal>
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<literal><replaceable>subexpr</replaceable>+</literal>
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<literal><replaceable>subexpr</replaceable>?</literal>
+</para>
+ </listitem>
+ </itemizedlist>
+
+The <literal>,</literal> operator indicates a sequence of sub-models, the
+<literal>|</literal> operator describes alternative sub-models. The
+<literal>*</literal> indicates zero or more repetitions, and
+<literal>+</literal> one or more repetitions. Finally, <literal>?</literal> can
+be used for optional sub-models. As atoms the regexp can contain names of
+elements; note that it is not allowed to include <literal>#PCDATA</literal>.
+</para>
+
+ <para>
+The exact syntax of the regular expressions is rather strange. This can be
+explained best by a list of constraints:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+The outermost expression must not be
+<literal><replaceable>element-name</replaceable></literal>.
+</para>
+ <para><emphasis>Illegal:</emphasis>
+<literal><![CDATA[<!ELEMENT x y>]]></literal>; this must be written as
+<literal><![CDATA[<!ELEMENT x (y)>]]></literal>.</para>
+ </listitem>
+ <listitem>
+ <para>
+For the unary operators <literal><replaceable>subexpr</replaceable>*</literal>,
+<literal><replaceable>subexpr</replaceable>+</literal>, and
+<literal><replaceable>subexpr</replaceable>?</literal>, the
+<literal><replaceable>subexpr</replaceable></literal> must not be again an
+unary operator.
+</para>
+ <para><emphasis>Illegal:</emphasis>
+<literal><![CDATA[<!ELEMENT x y**>]]></literal>; this must be written as
+<literal><![CDATA[<!ELEMENT x (y*)*>]]></literal>.</para>
+ </listitem>
+ <listitem>
+ <para>
+Between <literal>)</literal> and one of the unary operatory
+<literal>*</literal>, <literal>+</literal>, or <literal>?</literal>, there must
+not be whitespace.</para>
+ <para><emphasis>Illegal:</emphasis>
+<literal><![CDATA[<!ELEMENT x (y|z) *>]]></literal>; this must be written as
+<literal><![CDATA[<!ELEMENT x (y|z)*>]]></literal>.</para>
+ </listitem>
+ <listitem><para>There is the additional constraint that the
+right parenthsis must be contained in the same entity as the left parenthesis;
+see the section about parsed entities below.</para>
+ </listitem>
+ </itemizedlist>
+
+</para>
+
+<para>
+Note that there is another restriction on regular expressions which must be
+deterministic. This means that the parser must be able to see by looking at the
+next token which alternative is actually used, or whether the repetition
+stops. The reason for this is simply compatability with SGML (there is no
+intrinsic reason for this rule; XML can live without this restriction).
+</para>
+
+ <blockquote>
+ <title>Example</title>
+ <para>
+The elements are declared as follows:
+
+<programlisting>
+<![CDATA[<!ELEMENT q (r?, (s | t)+)>
+<!ELEMENT r (#PCDATA)>
+<!ELEMENT s EMPTY>
+<!ELEMENT t (q | r)>
+]]></programlisting>
+
+This is a legal instance:
+
+<programlisting>
+<![CDATA[<q><r>Some characters</r><s/></q>]]>
+</programlisting>
+
+(Note: <literal><s/></literal> is an abbreviation for
+<literal><s></s></literal>.)
+
+It would be illegal to leave <literal><![CDATA[<s/>]]></literal> out because at
+least one instance of <literal>s</literal> or <literal>t</literal> must be
+present. It would be illegal, too, if characters existed outside the
+<literal>r</literal> element; the only exception is white space. -- This is
+legal, too:
+
+<programlisting>
+<![CDATA[<q><s/><t><q><s/></q></t></q>]]>
+</programlisting>
+</para>
+ </blockquote>
+
+ </sect2>
+
+ <!-- ======================================== -->
+
+ <sect2>
+ <title>Attribute lists and ATTLIST declarations</title>
+ <para>
+Elements may have attributes. These are put into the start tag of an element as
+follows:
+
+<programlisting>
+<<replaceable>element-name</replaceable> <replaceable>attribute<subscript>1</subscript></replaceable>="<replaceable>value<subscript>1</subscript></replaceable>" ... <replaceable>attribute<subscript>n</subscript></replaceable>="<replaceable>value<subscript>n</subscript></replaceable>">
+</programlisting>
+
+Instead of
+<literal>"<replaceable>value<subscript>k</subscript></replaceable>"</literal>
+it is also possible to use single quotes as in
+<literal>'<replaceable>value<subscript>k</subscript></replaceable>'</literal>.
+Note that you cannot use double quotes literally within the value of the
+attribute if double quotes are the delimiters; the same applies to single
+quotes. You can generally not use < and & as characters in attribute
+values. It is possible to include the paraphrases &lt;, &gt;,
+&amp;, &apos;, and &quot; (and any other reference to a general
+entity as long as the entity is not defined by an external file) as well as
+&#<replaceable>n</replaceable>;.
+</para>
+
+ <para>
+Before you can use an attribute you must declare it. An ATTLIST declaration
+looks as follows:
+
+<programlisting>
+<!ATTLIST <replaceable>element-name</replaceable>
+ <replaceable>attribute-name</replaceable> <replaceable>attribute-type</replaceable> <replaceable>attribute-default</replaceable>
+ ...
+ <replaceable>attribute-name</replaceable> <replaceable>attribute-type</replaceable> <replaceable>attribute-default</replaceable>
+>
+</programlisting>
+
+There are a lot of types, but most important are:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>CDATA</literal>: Every string is allowed as attribute value.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>NMTOKEN</literal>: Every nametoken is allowed as attribute
+value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary
+order.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>NMTOKENS</literal>: A space-separated list of nametokens is allowed as
+attribute value.
+</para>
+ </listitem>
+ </itemizedlist>
+
+The most interesting default declarations are:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>#REQUIRED</literal>: The attribute must be specified.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>#IMPLIED</literal>: The attribute can be specified but also can be
+left out. The application can find out whether the attribute was present or
+not.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>"<replaceable>value</replaceable>"</literal> or
+<literal>'<replaceable>value</replaceable>'</literal>: This particular value is
+used as default if the attribute is omitted in the element.
+</para>
+ </listitem>
+ </itemizedlist>
+</para>
+
+ <blockquote>
+ <title>Example</title>
+ <para>
+This is a valid attribute declaration for element type <literal>r</literal>:
+
+<programlisting>
+<![CDATA[<!ATTLIST r
+ x CDATA #REQUIRED
+ y NMTOKEN #IMPLIED
+ z NMTOKENS "one two three">
+]]></programlisting>
+
+This means that <literal>x</literal> is a required attribute that cannot be
+left out, while <literal>y</literal> and <literal>z</literal> are optional. The
+XML parser indicates the application whether <literal>y</literal> is present or
+not, but if <literal>z</literal> is missing the default value
+"one two three" is returned automatically.
+</para>
+
+ <para>
+This is a valid example of these attributes:
+
+<programlisting>
+<![CDATA[<r x="He said: "I don't like quotes!"" y='1'>]]>
+</programlisting>
+</para>
+ </blockquote>
+
+ </sect2>
+
+ <sect2>
+ <title>Parsed entities</title>
+ <para>
+Elements describe the logical structure of the document, while
+<emphasis>entities</emphasis> determine the physical structure. Entities are
+the pieces of text the parser operates on, mostly files and macros. Entities
+may be <emphasis>parsed</emphasis> in which case the parser reads the text and
+interprets it as XML markup, or <emphasis>unparsed</emphasis> which simply
+means that the data of the entity has a foreign format (e.g. a GIF icon).
+</para>
+
+ <para>If the parsed entity is going to be used as part of the DTD, it
+is called a <emphasis>parameter entity</emphasis>. You can declare a parameter
+entity with a fixed text as content by:
+
+<programlisting>
+<!ENTITY % <replaceable>name</replaceable> "<replaceable>value</replaceable>">
+</programlisting>
+
+Within the DTD, you can <emphasis>refer to</emphasis> this entity, i.e. read
+the text of the entity, by:
+
+<programlisting>
+%<replaceable>name</replaceable>;
+</programlisting>
+
+Such entities behave like macros, i.e. when they are referred to, the
+macro text is inserted and read instead of the original text.
+
+<blockquote>
+ <title>Example</title>
+ <para>
+For example, you can declare two elements with the same content model by:
+
+<programlisting>
+<![CDATA[
+<!ENTITY % model "a | b | c">
+<!ELEMENT x (%model;)>
+<!ELEMENT y (%model;)>
+]]>
+</programlisting>
+
+</para>
+ </blockquote>
+
+If the contents of the entity are given as string constant, the entity is
+called an <emphasis>internal</emphasis> entity. It is also possible to name a
+file to be used as content (an <emphasis>external</emphasis> entity):
+
+<programlisting>
+<!ENTITY % <replaceable>name</replaceable> SYSTEM "<replaceable>file name</replaceable>">
+</programlisting>
+
+There are some restrictions for parameter entities:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+If the internal parameter entity contains the first token of a declaration
+(i.e. <literal><!</literal>), it must also contain the last token of the
+declaration, i.e. the <literal>></literal>. This means that the entity
+either contains a whole number of complete declarations, or some text from the
+middle of one declaration.
+</para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % e "(a | b | c)>">
+<!ELEMENT x %e;
+]]></programlisting> Because <literal><!</literal> is contained in the main
+entity, and the corresponding <literal>></literal> is contained in the
+entity <literal>e</literal>.</para>
+ </listitem>
+ <listitem>
+ <para>
+If the internal parameter entity contains a left paranthesis, it must also
+contain the corresponding right paranthesis.
+</para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % e "(a | b | c">
+<!ELEMENT x %e;)>
+]]></programlisting> Because <literal>(</literal> is contained in the entity
+<literal>e</literal>, and the corresponding <literal>)</literal> is
+contained in the main entity.</para>
+ </listitem>
+ <listitem>
+ <para>
+When reading text from an entity, the parser automatically inserts one space
+character before the entity text and one space character after the entity
+text. However, this rule is not applied within the definition of another
+entity.</para>
+<para><emphasis>Legal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % suffix "gif">
+<!ENTITY iconfile 'icon.%suffix;'>
+]]></programlisting> Because <literal>%suffix;</literal> is referenced within
+the definition text for <literal>iconfile</literal>, no additional spaces are
+added.
+</para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % suffix "test">
+<!ELEMENT x.%suffix; ANY>
+]]></programlisting>
+Because <literal>%suffix;</literal> is referenced outside the definition
+text of another entity, the parser replaces <literal>%suffix;</literal> by
+<literal><replaceable>space</replaceable>test<replaceable>space</replaceable></literal>. </para>
+<para><emphasis>Illegal:</emphasis>
+<programlisting>
+<![CDATA[
+<!ENTITY % e "(a | b | c)">
+<!ELEMENT x %e;*>
+]]></programlisting> Because there is a whitespace between <literal>)</literal>
+and <literal>*</literal>, which is illegal.</para>
+ </listitem>
+ <listitem>
+ <para>
+An external parameter entity must always consist of a whole number of complete
+declarations.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+In the internal subset of the DTD, a reference to a parameter entity (internal
+or external) is only allowed at positions where a new declaration can start.
+</para>
+ </listitem>
+ </itemizedlist>
+</para>
+
+ <para>
+If the parsed entity is going to be used in the document instance, it is called
+a <emphasis>general entity</emphasis>. Such entities can be used as
+abbreviations for frequent phrases, or to include external files. Internal
+general entities are declared as follows:
+
+<programlisting>
+<!ENTITY <replaceable>name</replaceable> "<replaceable>value</replaceable>">
+</programlisting>
+
+External general entities are declared this way:
+
+<programlisting>
+<!ENTITY <replaceable>name</replaceable> SYSTEM "<replaceable>file name</replaceable>">
+</programlisting>
+
+References to general entities are written as:
+
+<programlisting>
+&<replaceable>name</replaceable>;
+</programlisting>
+
+The main difference between parameter and general entities is that the former
+are only recognized in the DTD and that the latter are only recognized in the
+document instance. As the DTD is parsed before the document, the parameter
+entities are expanded first; for example it is possible to use the content of a
+parameter entity as the name of a general entity:
+<literal>&#38;%name;;</literal><footnote><para>This construct is only
+allowed within the definition of another entity; otherwise extra spaces would
+be added (as explained above). Such indirection is not recommended.
+</para>
+<para>Complete example:
+<programlisting>
+<![CDATA[
+<!ENTITY % variant "a"> <!-- or "b" -->
+<!ENTITY text-a "This is text A.">
+<!ENTITY text-b "This is text B.">
+<!ENTITY text "&text-%variant;;">
+]]></programlisting>
+You can now write <literal>&text;</literal> in the document instance, and
+depending on the value of <literal>variant</literal> either
+<literal>text-a</literal> or <literal>text-b</literal> is inserted.</para>
+</footnote>.
+</para>
+ <para>
+General entities must respect the element hierarchy. This means that there must
+be an end tag for every start tag in the entity value, and that end tags
+without corresponding start tags are not allowed.
+</para>
+
+ <blockquote>
+ <title>Example</title>
+ <para>
+If the author of a document changes sometimes, it is worthwhile to set up a
+general entity containing the names of the authors. If the author changes, you
+need only to change the definition of the entity, and do not need to check all
+occurrences of authors' names:
+
+<programlisting>
+<![CDATA[
+<!ENTITY authors "Gerd Stolpmann">
+]]>
+</programlisting>
+
+In the document text, you can now refer to the author names by writing
+<literal>&authors;</literal>.
+</para>
+
+ <para>
+<emphasis>Illegal:</emphasis>
+The following two entities are illegal because the elements in the definition
+do not nest properly:
+
+<programlisting>
+<![CDATA[
+<!ENTITY lengthy-tag "<section textcolor='white' background='graphic'>">
+<!ENTITY nonsense "<a></b>">
+]]></programlisting>
+</para>
+ </blockquote>
+
+ <para>
+Earlier in this introduction we explained that there are substitutes for
+reserved characters: &lt;, &gt;, &amp;, &apos;, and
+&quot;. These are simply predefined general entities; note that they are
+the only predefined entities. It is allowed to define these entities again
+as long as the meaning is unchanged.
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Notations and unparsed entities</title>
+ <para>
+Unparsed entities have a foreign format and can thus not be read by the XML
+parser. Unparsed entities are always external. The format of an unparsed entity
+must have been declared, such a format is called a
+<emphasis>notation</emphasis>. The entity can then be declared by referring to
+this notation. As unparsed entities do not contain XML text, it is not possible
+to include them directly into the document; you can only declare attributes
+such that names of unparsed entities are acceptable values.
+</para>
+
+ <para>
+As you can see, unparsed entities are too complicated in order to have any
+purpose. It is almost always better to simply pass the name of the data file as
+normal attribute value, and let the application recognize and process the
+foreign format.
+</para>
+ </sect2>
+
+ </sect1>
+
+
+ <!-- ================================================== -->
+
+
+ <sect1 id="sect.readme.dtd">
+ <title>A complete example: The <emphasis>readme</emphasis> DTD</title>
+ <para>
+The reason for <emphasis>readme</emphasis> was that I often wrote two versions
+of files such as README and INSTALL which explain aspects of a distributed
+software archive; one version was ASCII-formatted, the other was written in
+HTML. Maintaining both versions means double amount of work, and changes
+of one version may be forgotten in the other version. To improve this situation
+I invented the <emphasis>readme</emphasis> DTD which allows me to maintain only
+one source written as XML document, and to generate the ASCII and the HTML
+version from it.
+</para>
+
+ <para>
+In this section, I explain only the DTD. The <emphasis>readme</emphasis> DTD is
+contained in the &markup; distribution together with the two converters to
+produce ASCII and HTML. Another <link
+linkend="sect.readme.to-html">section</link> of this manual describes the HTML
+converter.
+</para>
+
+ <para>
+The documents have a simple structure: There are up to three levels of nested
+sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The
+outermost element has usually the type <literal>readme</literal>, it is
+declared by
+
+<programlisting>
+<![CDATA[<!ELEMENT readme (sect1+)>
+<!ATTLIST readme
+ title CDATA #REQUIRED>
+]]></programlisting>
+
+This means that this element contains one or more sections of the first level
+(element type <literal>sect1</literal>), and that the element has a required
+attribute <literal>title</literal> containing character data (CDATA). Note that
+<literal>readme</literal> elements must not contain text data.
+</para>
+
+ <para>
+The three levels of sections are declared as follows:
+
+<programlisting>
+<![CDATA[<!ELEMENT sect1 (title,(sect2|p|ul)+)>
+
+<!ELEMENT sect2 (title,(sect3|p|ul)+)>
+
+<!ELEMENT sect3 (title,(p|ul)+)>
+]]></programlisting>
+
+Every section has a <literal>title</literal> element as first subelement. After
+the title an arbitrary but non-empty sequence of inner sections, paragraphs and
+item lists follows. Note that the inner sections must belong to the next higher
+section level; <literal>sect3</literal> elements must not contain inner
+sections because there is no next higher level.
+</para>
+
+ <para>
+Obviously, all three declarations allow paragraphs (<literal>p</literal>) and
+item lists (<literal>ul</literal>). The definition can be simplified at this
+point by using a parameter entity:
+
+<programlisting>
+<![CDATA[<!ENTITY % p.like "p|ul">
+
+<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
+
+<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
+
+<!ELEMENT sect3 (title,(%p.like;)+)>
+]]></programlisting>
+
+Here, the entity <literal>p.like</literal> is nothing but a macro abbreviating
+the same sequence of declarations; if new elements on the same level as
+<literal>p</literal> and <literal>ul</literal> are later added, it is
+sufficient only to change the entity definition. Note that there are some
+restrictions on the usage of entities in this context; most important, entities
+containing a left paranthesis must also contain the corresponding right
+paranthesis.
+</para>
+
+ <para>
+Note that the entity <literal>p.like</literal> is a
+<emphasis>parameter</emphasis> entity, i.e. the ENTITY declaration contains a
+percent sign, and the entity is referred to by
+<literal>%p.like;</literal>. This kind of entity must be used to abbreviate
+parts of the DTD; the <emphasis>general</emphasis> entities declared without
+percent sign and referred to as <literal>&name;</literal> are not allowed
+in this context.
+</para>
+
+ <para>
+The <literal>title</literal> element specifies the title of the section in
+which it occurs. The title is given as character data, optionally interspersed
+with line breaks (<literal>br</literal>):
+
+<programlisting>
+<![CDATA[<!ELEMENT title (#PCDATA|br)*>
+]]></programlisting>
+
+Compared with the <literal>title</literal> <emphasis>attribute</emphasis> of
+the <literal>readme</literal> element, this element allows inner markup
+(i.e. <literal>br</literal>) while attribute values do not: It is an error if
+an attribute value contains the left angle bracket < literally such that it
+is impossible to include inner elements.
+</para>
+
+ <para>
+The paragraph element <literal>p</literal> has a structure similar to
+<literal>title</literal>, but it allows more inner elements:
+
+<programlisting>
+<![CDATA[<!ENTITY % text "br|code|em|footnote|a">
+
+<!ELEMENT p (#PCDATA|%text;)*>
+]]></programlisting>
+
+Line breaks do not have inner structure, so they are declared as being empty:
+
+<programlisting>
+<![CDATA[<!ELEMENT br EMPTY>
+]]></programlisting>
+
+This means that really nothing is allowed within <literal>br</literal>; you
+must always write <literal><![CDATA[<br></br>]]></literal> or abbreviated
+<literal><![CDATA[<br/>]]></literal>.
+</para>
+
+ <para>
+Code samples should be marked up by the <literal>code</literal> tag; emphasized
+text can be indicated by <literal>em</literal>:
+
+<programlisting>
+<![CDATA[<!ELEMENT code (#PCDATA)>
+
+<!ELEMENT em (#PCDATA|%text;)*>
+]]></programlisting>
+
+That <literal>code</literal> elements are not allowed to contain further markup
+while <literal>em</literal> elements do is a design decision by the author of
+the DTD.
+</para>
+
+ <para>
+Unordered lists simply consists of one or more list items, and a list item may
+contain paragraph-level material:
+
+<programlisting>
+<![CDATA[<!ELEMENT ul (li+)>
+
+<!ELEMENT li (%p.like;)*>
+]]></programlisting>
+
+Footnotes are described by the text of the note; this text may contain
+text-level markup. There is no mechanism to describe the numbering scheme of
+footnotes, or to specify how footnote references are printed.
+
+<programlisting>
+<![CDATA[<!ELEMENT footnote (#PCDATA|%text;)*>
+]]></programlisting>
+
+Hyperlinks are written as in HTML. The anchor tag contains the text describing
+where the link points to, and the <literal>href</literal> attribute is the
+pointer (as URL). There is no way to describe locations of "hash marks". If the
+link refers to another <emphasis>readme</emphasis> document, the attribute
+<literal>readmeref</literal> should be used instead of <literal>href</literal>.
+The reason is that the converted document has usually a different system
+identifier (file name), and the link to a converted document must be
+converted, too.
+
+<programlisting>
+<![CDATA[<!ELEMENT a (#PCDATA)*>
+<!ATTLIST a
+ href CDATA #IMPLIED
+ readmeref CDATA #IMPLIED
+>
+]]></programlisting>
+
+Note that although it is only sensible to specify one of the two attributes,
+the DTD has no means to express this restriction.
+</para>
+
+<para>
+So far the DTD. Finally, here is a document for it:
+
+<programlisting>
+<![CDATA[
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd">
+<readme title="How to use the readme converters">
+<sect1>
+ <title>Usage</title>
+ <p>
+ The <em>readme</em> converter is invoked on the command line by:
+ </p>
+ <p>
+ <code>readme [ -text | -html ] input.xml</code>
+ </p>
+ <p>
+ Here a list of options:
+ </p>
+ <ul>
+ <li>
+ <p><code>-text</code>: specifies that ASCII output should be produced</p>
+ </li>
+ <li>
+ <p><code>-html</code>: specifies that HTML output should be produced</p>
+ </li>
+ </ul>
+ <p>
+ The input file must be given on the command line. The converted output is
+ printed to <em>stdout</em>.
+ </p>
+</sect1>
+<sect1>
+ <title>Author</title>
+ <p>
+ The program has been written by
+ <a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>.
+ </p>
+</sect1>
+</readme>
+]]></programlisting>
+
+</para>
+
+
+ </sect1>
+ </chapter>
+
+<!-- ********************************************************************** -->
+
+ <chapter>
+ <title>Using &markup;</title>
+
+ <sect1>
+ <title>Validation</title>
+ <para>
+The parser can be used to <emphasis>validate</emphasis> a document. This means
+that all the constraints that must hold for a valid document are actually
+checked. Validation is the default mode of &markup;, i.e. every document is
+validated while it is being parsed.
+</para>
+
+ <para>
+In the <literal>examples</literal> directory of the distribution you find the
+<literal>pxpvalidate</literal> application. It is invoked in the following way:
+
+<programlisting>
+pxpvalidate [ -wf ] <replaceable>file</replaceable>...
+</programlisting>
+
+The files mentioned on the command line are validated, and every warning and
+every error messages are printed to stderr.
+</para>
+
+ <para>
+The -wf switch modifies the behaviour such that a well-formedness parser is
+simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the
+DTD are ignored, and only the ENTITY declarations will take effect. This mode
+is intended for documents lacking a DTD. Please note that the parser still
+scans the DTD fully and will report all errors in the DTD; such checks are not
+required by a well-formedness parser.
+</para>
+
+ <para>
+The <literal>pxpvalidate</literal> application is the simplest sensible program
+using &markup;, you may consider it as "hello world" program.
+</para>
+ </sect1>
+
+
+ <!-- ================================================== -->
+
+
+ <sect1>
+ <title>How to parse a document from an application</title>
+ <para>
+Let me first give a rough overview of the object model of the parser. The
+following items are represented by objects:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<emphasis>Documents:</emphasis> The document representation is more or less the
+anchor for the application; all accesses to the parsed entities start here. It
+is described by the class <literal>document</literal> contained in the module
+<literal>Pxp_document</literal>. You can get some global information, such
+as the XML declaration the document begins with, the DTD of the document,
+global processing instructions, and most important, the document tree.
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<emphasis>The contents of documents:</emphasis> The contents have the structure
+of a tree: Elements contain other elements and text<footnote><para>Elements may
+also contain processing instructions. Unlike other document models, &markup;
+separates processing instructions from the rest of the text and provides a
+second interface to access them (method <literal>pinstr</literal>). However,
+there is a parser option (<literal>enable_pinstr_nodes</literal>) which changes
+the behaviour of the parser such that extra nodes for processing instructions
+are included into the tree.</para>
+<para>Furthermore, the tree does normally not contain nodes for XML comments;
+they are ignored by default. Again, there is an option
+(<literal>enable_comment_nodes</literal>) changing this.</para>
+</footnote>.
+
+The common type to represent both kinds of content is <literal>node</literal>
+which is a class type that unifies the properties of elements and character
+data. Every node has a list of children (which is empty if the element is empty
+or the node represents text); nodes may have attributes; nodes have always text
+contents. There are two implementations of <literal>node</literal>, the class
+<literal>element_impl</literal> for elements, and the class
+<literal>data_impl</literal> for text data. You find these classes and class
+types in the module <literal>Pxp_document</literal>, too.
+</para>
+
+ <para>
+Note that attribute lists are represented by non-class values.
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<emphasis>The node extension:</emphasis> For advanced usage, every node of the
+document may have an associated <emphasis>extension</emphasis> which is simply
+a second object. This object must have the three methods
+<literal>clone</literal>, <literal>node</literal>, and
+<literal>set_node</literal> as bare minimum, but you are free to add methods as
+you want. This is the preferred way to add functionality to the document
+tree<footnote><para>Due to the typing system it is more or less impossible to
+derive recursive classes in O'Caml. To get around this, it is common practice
+to put the modifiable or extensible part of recursive objects into parallel
+objects.</para> </footnote>. The class type <literal>extension</literal> is
+defined in <literal>Pxp_document</literal>, too.
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<emphasis>The DTD:</emphasis> Sometimes it is necessary to access the DTD of a
+document; the average application does not need this feature. The class
+<literal>dtd</literal> describes DTDs, and makes it possible to get
+representations of element, entity, and notation declarations as well as
+processing instructions contained in the DTD. This class, and
+<literal>dtd_element</literal>, <literal>dtd_notation</literal>, and
+<literal>proc_instruction</literal> can be found in the module
+<literal>Pxp_dtd</literal>. There are a couple of classes representing
+different kinds of entities; these can be found in the module
+<literal>Pxp_entity</literal>.
+</para>
+ </listitem>
+ </itemizedlist>
+
+Additionally, the following modules play a role:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<emphasis>Pxp_yacc:</emphasis> Here the main parsing functions such as
+<literal>parse_document_entity</literal> are located. Some additional types and
+functions allow the parser to be configured in a non-standard way.
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<emphasis>Pxp_types:</emphasis> This is a collection of basic types and
+exceptions.
+</para>
+ </listitem>
+ </itemizedlist>
+
+There are some further modules that are needed internally but are not part of
+the API.
+</para>
+
+ <para>
+Let the document to be parsed be stored in a file called
+<literal>doc.xml</literal>. The parsing process is started by calling the
+function
+
+<programlisting>
+val parse_document_entity : config -> source -> 'ext spec -> 'ext document
+</programlisting>
+
+defined in the module <literal>Pxp_yacc</literal>. The first argument
+specifies some global properties of the parser; it is recommended to start with
+the <literal>default_config</literal>. The second argument determines where the
+document to be parsed comes from; this may be a file, a channel, or an entity
+ID. To parse <literal>doc.xml</literal>, it is sufficient to pass
+<literal>from_file "doc.xml"</literal>.
+</para>
+
+ <para>
+The third argument passes the object specification to use. Roughly
+speaking, it determines which classes implement the node objects of which
+element types, and which extensions are to be used. The <literal>'ext</literal>
+polymorphic variable is the type of the extension. For the moment, let us
+simply pass <literal>default_spec</literal> as this argument, and ignore it.
+</para>
+
+ <para>
+So the following expression parses <literal>doc.xml</literal>:
+
+<programlisting>
+open Pxp_yacc
+let d = parse_document_entity default_config (from_file "doc.xml") default_spec
+</programlisting>
+
+Note that <literal>default_config</literal> implies that warnings are collected
+but not printed. Errors raise one of the exception defined in
+<literal>Pxp_types</literal>; to get readable errors and warnings catch the
+exceptions as follows:
+
+<programlisting>
+<![CDATA[class warner =
+ object
+ method warn w =
+ print_endline ("WARNING: " ^ w)
+ end
+;;
+
+try
+ let config = { default_config with warner = new warner } in
+ let d = parse_document_entity config (from_file "doc.xml") default_spec
+ in
+ ...
+with
+ e ->
+ print_endline (Pxp_types.string_of_exn e)
+]]></programlisting>
+
+Now <literal>d</literal> is an object of the <literal>document</literal>
+class. If you want the node tree, you can get the root element by
+
+<programlisting>
+let root = d # root
+</programlisting>
+
+and if you would rather like to access the DTD, determine it by
+
+<programlisting>
+let dtd = d # dtd
+</programlisting>
+
+As it is more interesting, let us investigate the node tree now. Given the root
+element, it is possible to recursively traverse the whole tree. The children of
+a node <literal>n</literal> are returned by the method
+<literal>sub_nodes</literal>, and the type of a node is returned by
+<literal>node_type</literal>. This function traverses the tree, and prints the
+type of each node:
+
+<programlisting>
+<![CDATA[let rec print_structure n =
+ let ntype = n # node_type in
+ match ntype with
+ T_element name ->
+ print_endline ("Element of type " ^ name);
+ let children = n # sub_nodes in
+ List.iter print_structure children
+ | T_data ->
+ print_endline "Data"
+ | _ ->
+ (* Other node types are not possible unless the parser is configured
+ differently.
+ *)
+ assert false
+]]></programlisting>
+
+You can call this function by
+
+<programlisting>
+print_structure root
+</programlisting>
+
+The type returned by <literal>node_type</literal> is either <literal>T_element
+name</literal> or <literal>T_data</literal>. The <literal>name</literal> of the
+element type is the string included in the angle brackets. Note that only
+elements have children; data nodes are always leaves of the tree.
+</para>
+
+ <para>
+There are some more methods in order to access a parsed node tree:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>n # parent</literal>: Returns the parent node, or raises
+<literal>Not_found</literal> if the node is already the root
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>n # root</literal>: Returns the root of the node tree.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>n # attribute a</literal>: Returns the value of the attribute with
+name <literal>a</literal>. The method returns a value for every
+<emphasis>declared</emphasis> attribute, independently of whether the attribute
+instance is defined or not. If the attribute is not declared,
+<literal>Not_found</literal> will be raised. (In well-formedness mode, every
+attribute is considered as being implicitly declared with type
+<literal>CDATA</literal>.)
+</para>
+
+<para>
+The following return values are possible: <literal>Value s</literal>,
+<literal>Valuelist sl</literal> , and <literal>Implied_value</literal>.
+The first two value types indicate that the attribute value is available,
+either because there is a definition
+<literal><replaceable>a</replaceable>="<replaceable>value</replaceable>"</literal>
+in the XML text, or because there is a default value (declared in the
+DTD). Only if both the instance definition and the default declaration are
+missing, the latter value <literal>Implied_value</literal> will be returned.
+</para>
+
+<para>
+In the DTD, every attribute is typed. There are single-value types (CDATA, ID,
+IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes
+<literal>Value s</literal> back, where <literal>s</literal> is the normalized
+string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS)
+represent list values, and the parser splits the XML literal into several
+tokens and returns these tokens as <literal>Valuelist sl</literal>.
+</para>
+
+<para>
+Normalization means that entity references (the
+<literal>&<replaceable>name</replaceable>;</literal> tokens) and
+character references
+(<literal>&#<replaceable>number</replaceable>;</literal>) are replaced
+by the text they represent, and that white space characters are converted into
+plain spaces.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>n # data</literal>: Returns the character data contained in the
+node. For data nodes, the meaning is obvious as this is the main content of
+data nodes. For element nodes, this method returns the concatenated contents of
+all inner data nodes.
+</para>
+ <para>
+Note that entity references included in the text are resolved while they are
+being parsed; for example the text <![CDATA["a <> b"]]> will be returned
+as <![CDATA["a <> b"]]> by this method. Spaces of data nodes are always
+preserved. Newlines are preserved, but always converted to \n characters even
+if newlines are encoded as \r\n or \r. Normally you will never see two adjacent
+data nodes because the parser collapses all data material at one location into
+one node. (However, if you create your own tree or transform the parsed tree,
+it is possible to have adjacent data nodes.)
+</para>
+ <para>
+Note that elements that do <emphasis>not</emphasis> allow #PCDATA as content
+will not have data nodes as children. This means that spaces and newlines, the
+only character material allowed for such elements, are silently dropped.
+</para>
+ </listitem>
+ </itemizedlist>
+
+For example, if the task is to print all contents of elements with type
+"valuable" whose attribute "priority" is "1", this function can help:
+
+<programlisting>
+<![CDATA[let rec print_valuable_prio1 n =
+ let ntype = n # node_type in
+ match ntype with
+ T_element "valuable" when n # attribute "priority" = Value "1" ->
+ print_endline "Valuable node with priotity 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ let children = n # sub_nodes in
+ List.iter print_valuable_prio1 children
+ | _ ->
+ assert false
+]]></programlisting>
+
+You can call this function by:
+
+<programlisting>
+print_valuable_prio1 root
+</programlisting>
+
+If you like a DSSSL-like style, you can make the function
+<literal>process_children</literal> explicit:
+
+<programlisting>
+<![CDATA[let rec print_valuable_prio1 n =
+
+ let process_children n =
+ let children = n # sub_nodes in
+ List.iter print_valuable_prio1 children
+ in
+
+ let ntype = n # node_type in
+ match ntype with
+ T_element "valuable" when n # attribute "priority" = Value "1" ->
+ print_endline "Valuable node with priority 1 found:";
+ print_endline (n # data)
+ | (T_element _ | T_data) ->
+ process_children n
+ | _ ->
+ assert false
+]]></programlisting>
+
+So far, O'Caml is now a simple "style-sheet language": You can form a big
+"match" expression to distinguish between all significant cases, and provide
+different reactions on different conditions. But this technique has
+limitations; the "match" expression tends to get larger and larger, and it is
+difficult to store intermediate values as there is only one big
+recursion. Alternatively, it is also possible to represent the various cases as
+classes, and to use dynamic method lookup to find the appropiate class. The
+next section explains this technique in detail.
+
+</para>
+ </sect1>
+
+
+ <!-- ================================================== -->
+
+
+ <sect1>
+ <title>Class-based processing of the node tree</title>
+ <para>
+By default, the parsed node tree consists of objects of the same class; this is
+a good design as long as you want only to access selected parts of the
+document. For complex transformations, it may be better to use different
+classes for objects describing different element types.
+</para>
+
+ <para>
+For example, if the DTD declares the element types <literal>a</literal>,
+<literal>b</literal>, and <literal>c</literal>, and if the task is to convert
+an arbitrary document into a printable format, the idea is to define for every
+element type a separate class that has a method <literal>print</literal>. The
+classes are <literal>eltype_a</literal>, <literal>eltype_b</literal>, and
+<literal>eltype_c</literal>, and every class implements
+<literal>print</literal> such that elements of the type corresponding to the
+class are converted to the output format.
+</para>
+
+ <para>
+The parser supports such a design directly. As it is impossible to derive
+recursive classes in O'Caml<footnote><para>The problem is that the subclass is
+usually not a subtype in this case because O'Caml has a contravariant subtyping
+rule. </para> </footnote>, the specialized element classes cannot be formed by
+simply inheriting from the built-in classes of the parser and adding methods
+for customized functionality. To get around this limitation, every node of the
+document tree is represented by <emphasis>two</emphasis> objects, one called
+"the node" and containing the recursive definition of the tree, one called "the
+extension". Every node object has a reference to the extension, and the
+extension has a reference to the node. The advantage of this model is that it
+is now possible to customize the extension without affecting the typing
+constraints of the recursive node definition.
+</para>
+
+ <para>
+Every extension must have the three methods <literal>clone</literal>,
+<literal>node</literal>, and <literal>set_node</literal>. The method
+<literal>clone</literal> creates a deep copy of the extension object and
+returns it; <literal>node</literal> returns the node object for this extension
+object; and <literal>set_node</literal> is used to tell the extension object
+which node is associated with it, this method is automatically called when the
+node tree is initialized. The following definition is a good starting point
+for these methods; usually <literal>clone</literal> must be further refined
+when instance variables are added to the class:
+
+<programlisting>
+<![CDATA[class custom_extension =
+ object (self)
+
+ val mutable node = (None : custom_extension node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ end
+]]>
+</programlisting>
+
+This part of the extension is usually the same for all classes, so it is a good
+idea to consider <literal>custom_extension</literal> as the super-class of the
+further class definitions. Continuining the example of above, we can define the
+element type classes as follows:
+
+<programlisting>
+<![CDATA[class virtual custom_extension =
+ object (self)
+ ... clone, node, set_node defined as above ...
+
+ method virtual print : out_channel -> unit
+ end
+
+class eltype_a =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch = ...
+ end
+]]></programlisting>
+
+The method <literal>print</literal> can now be implemented for every element
+type separately. Note that you get the associated node by invoking
+
+<programlisting>
+self # node
+</programlisting>
+
+and you get the extension object of a node <literal>n</literal> by writing
+
+<programlisting>
+n # extension
+</programlisting>
+
+It is guaranteed that
+
+<programlisting>
+self # node # extension == self
+</programlisting>
+
+always holds.
+</para>
+
+ <para>Here are sample definitions of the <literal>print</literal>
+methods:
+
+<programlisting><![CDATA[
+class eltype_a =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Nodes <a>...</a> are only containers: *)
+ output_string ch "(";
+ List.iter
+ (fun n -> n # extension # print ch)
+ (self # node # sub_nodes);
+ output_string ch ")";
+ end
+
+class eltype_b =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the value of the CDATA attribute "print": *)
+ match self # node # attribute "print" with
+ Value s -> output_string ch s
+ | Implied_value -> output_string ch "<missing>"
+ | Valuelist l -> assert false
+ (* not possible because the att is CDATA *)
+ end
+
+class eltype_c =
+ object (self)
+ inherit custom_extension
+ method print ch =
+ (* Print the contents of this element: *)
+ output_string ch (self # node # data)
+ end
+
+class null_extension =
+ object (self)
+ inherit custom_extension
+ method print ch = assert false
+ end
+]]></programlisting>
+</para>
+
+
+ <para>
+The remaining task is to configure the parser such that these extension classes
+are actually used. Here another problem arises: It is not possible to
+dynamically select the class of an object to be created. As workaround,
+&markup; allows the user to specify <emphasis>exemplar objects</emphasis> for
+the various element types; instead of creating the nodes of the tree by
+applying the <literal>new</literal> operator the nodes are produced by
+duplicating the exemplars. As object duplication preserves the class of the
+object, one can create fresh objects of every class for which previously an
+exemplar has been registered.
+</para>
+
+ <para>
+Exemplars are meant as objects without contents, the only interesting thing is
+that exemplars are instances of a certain class. The creation of an exemplar
+for an element node can be done by:
+
+<programlisting>
+let element_exemplar = new element_impl extension_exemplar
+</programlisting>
+
+And a data node exemplar is created by:
+
+<programlisting>
+let data_exemplar = new data_impl extension_exemplar
+</programlisting>
+
+The classes <literal>element_impl</literal> and <literal>data_impl</literal>
+are defined in the module <literal>Pxp_document</literal>. The constructors
+initialize the fresh objects as empty objects, i.e. without children, without
+data contents, and so on. The <literal>extension_exemplar</literal> is the
+initial extension object the exemplars are associated with.
+</para>
+
+ <para>
+Once the exemplars are created and stored somewhere (e.g. in a hash table), you
+can take an exemplar and create a concrete instance (with contents) by
+duplicating it. As user of the parser you are normally not concerned with this
+as this is part of the internal logic of the parser, but as background knowledge
+it is worthwhile to mention that the two methods
+<literal>create_element</literal> and <literal>create_data</literal> actually
+perform the duplication of the exemplar for which they are invoked,
+additionally apply modifications to the clone, and finally return the new
+object. Moreover, the extension object is copied, too, and the new node object
+is associated with the fresh extension object. Note that this is the reason why
+every extension object must have a <literal>clone</literal> method.
+</para>
+
+ <para>
+The configuration of the set of exemplars is passed to the
+<literal>parse_document_entity</literal> function as third argument. In our
+example, this argument can be set up as follows:
+
+<programlisting>
+<![CDATA[let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl (new null_extension))
+ ~default_element_exemplar: (new element_impl (new null_extension))
+ ~element_alist:
+ [ "a", new element_impl (new eltype_a);
+ "b", new element_impl (new eltype_b);
+ "c", new element_impl (new eltype_c);
+ ]
+ ()
+]]></programlisting>
+
+The <literal>~element_alist</literal> function argument defines the mapping
+from element types to exemplars as associative list. The argument
+<literal>~data_exemplar</literal> specifies the exemplar for data nodes, and
+the <literal>~default_element_exemplar</literal> is used whenever the parser
+finds an element type for which the associative list does not define an
+exemplar.
+</para>
+
+ <para>
+The configuration is now complete. You can still use the same parsing
+functions, only the initialization is a bit different. For example, call the
+parser by:
+
+<programlisting>
+let d = parse_document_entity default_config (from_file "doc.xml") spec
+</programlisting>
+
+Note that the resulting document <literal>d</literal> has a usable type;
+especially the <literal>print</literal> method we added is visible. So you can
+print your document by
+
+<programlisting>
+d # root # extension # print stdout
+</programlisting>
+</para>
+
+ <para>
+This object-oriented approach looks rather complicated; this is mostly caused
+by working around some problems of the strict typing system of O'Caml. Some
+auxiliary concepts such as extensions were needed, but the practical
+consequences are low. In the next section, one of the examples of the
+distribution is explained, a converter from <emphasis>readme</emphasis>
+documents to HTML.
+</para>
+
+ </sect1>
+
+
+ <!-- ================================================== -->
+
+
+ <sect1 id="sect.readme.to-html">
+ <title>Example: An HTML backend for the <emphasis>readme</emphasis>
+DTD</title>
+
+ <para>The converter from <emphasis>readme</emphasis> documents to HTML
+documents follows strictly the approach to define one class per element
+type. The HTML code is similar to the <emphasis>readme</emphasis> source,
+because of this most elements can be converted in the following way: Given the
+input element
+
+<programlisting>
+<![CDATA[<e>content</e>]]>
+</programlisting>
+
+the conversion text is the concatenation of a computed prefix, the recursively
+converted content, and a computed suffix.
+</para>
+
+ <para>
+Only one element type cannot be handled by this scheme:
+<literal>footnote</literal>. Footnotes are collected while they are found in
+the input text, and they are printed after the main text has been converted and
+printed.
+</para>
+
+ <sect2>
+ <title>Header</title>
+ <para>
+<programlisting>&readme.code.header;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Type declarations</title>
+ <para>
+<programlisting>&readme.code.footnote-printer;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Class <literal>store</literal></title>
+ <para>
+The <literal>store</literal> is a container for footnotes. You can add a
+footnote by invoking <literal>alloc_footnote</literal>; the argument is an
+object of the class <literal>footnote_printer</literal>, the method returns the
+number of the footnote. The interesting property of a footnote is that it can
+be converted to HTML, so a <literal>footnote_printer</literal> is an object
+with a method <literal>footnote_to_html</literal>. The class
+<literal>footnote</literal> which is defined below has a compatible method
+<literal>footnote_to_html</literal> such that objects created from it can be
+used as <literal>footnote_printer</literal>s.
+</para>
+ <para>
+The other method, <literal>print_footnotes</literal> prints the footnotes as
+definition list, and is typically invoked after the main material of the page
+has already been printed. Every item of the list is printed by
+<literal>footnote_to_html</literal>.
+</para>
+
+ <para>
+<programlisting>&readme.code.store;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Function <literal>escape_html</literal></title>
+ <para>
+This function converts the characters <, >, &, and " to their HTML
+representation. For example,
+<literal>escape_html "<>" = "&lt;&gt;"</literal>. Other
+characters are left unchanged.
+
+<programlisting>&readme.code.escape-html;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Virtual class <literal>shared</literal></title>
+ <para>
+This virtual class is the abstract superclass of the extension classes shown
+below. It defines the standard methods <literal>clone</literal>,
+<literal>node</literal>, and <literal>set_node</literal>, and declares the type
+of the virtual method <literal>to_html</literal>. This method recursively
+traverses the whole element tree, and prints the converted HTML code to the
+output channel passed as second argument. The first argument is the reference
+to the global <literal>store</literal> object which collects the footnotes.
+
+<programlisting>&readme.code.shared;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Class <literal>only_data</literal></title>
+ <para>
+This class defines <literal>to_html</literal> such that the character data of
+the current node is converted to HTML. Note that <literal>self</literal> is an
+extension object, <literal>self # node</literal> is the node object, and
+<literal>self # node # data</literal> returns the character data of the node.
+
+<programlisting>&readme.code.only-data;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Class <literal>readme</literal></title>
+ <para>
+This class converts elements of type <literal>readme</literal> to HTML. Such an
+element is (by definition) always the root element of the document. First, the
+HTML header is printed; the <literal>title</literal> attribute of the element
+determines the title of the HTML page. Some aspects of the HTML page can be
+configured by setting certain parameter entities, for example the background
+color, the text color, and link colors. After the header, the
+<literal>body</literal> tag, and the headline have been printed, the contents
+of the page are converted by invoking <literal>to_html</literal> on all
+children of the current node (which is the root node). Then, the footnotes are
+appended to this by telling the global <literal>store</literal> object to print
+the footnotes. Finally, the end tags of the HTML pages are printed.
+</para>
+
+ <para>
+This class is an example how to access the value of an attribute: The value is
+determined by invoking <literal>self # node # attribute "title"</literal>. As
+this attribute has been declared as CDATA and as being required, the value has
+always the form <literal>Value s</literal> where <literal>s</literal> is the
+string value of the attribute.
+</para>
+
+ <para>
+You can also see how entity contents can be accessed. A parameter entity object
+can be looked up by <literal>self # node # dtd # par_entity "name"</literal>,
+and by invoking <literal>replacement_text</literal> the value of the entity
+is returned after inner parameter and character entities have been
+processed. Note that you must use <literal>gen_entity</literal> instead of
+<literal>par_entity</literal> to access general entities.
+</para>
+
+ <para>
+<programlisting>&readme.code.readme;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Classes <literal>section</literal>, <literal>sect1</literal>,
+<literal>sect2</literal>, and <literal>sect3</literal></title>
+ <para>
+As the conversion process is very similar, the conversion classes of the three
+section levels are derived from the more general <literal>section</literal>
+class. The HTML code of the section levels only differs in the type of the
+headline, and because of this the classes describing the section levels can be
+computed by replacing the class argument <literal>the_tag</literal> of
+<literal>section</literal> by the HTML name of the headline tag.
+</para>
+
+ <para>
+Section elements are converted to HTML by printing a headline and then
+converting the contents of the element recursively. More precisely, the first
+sub-element is always a <literal>title</literal> element, and the other
+elements are the contents of the section. This structure is declared in the
+DTD, and it is guaranteed that the document matches the DTD. Because of this
+the title node can be separated from the rest without any checks.
+</para>
+
+ <para>
+Both the title node, and the body nodes are then converted to HTML by calling
+<literal>to_html</literal> on them.
+</para>
+
+ <para>
+<programlisting>&readme.code.section;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Classes <literal>map_tag</literal>, <literal>p</literal>,
+<literal>em</literal>, <literal>ul</literal>, <literal>li</literal></title>
+ <para>
+Several element types are converted to HTML by simply mapping them to
+corresponding HTML element types. The class <literal>map_tag</literal>
+implements this, and the class argument <literal>the_target_tag</literal>
+determines the tag name to map to. The output consists of the start tag, the
+recursively converted inner elements, and the end tag.
+
+<programlisting>&readme.code.map-tag;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Class <literal>br</literal></title>
+ <para>
+Element of type <literal>br</literal> are mapped to the same HTML type. Note
+that HTML forbids the end tag of <literal>br</literal>.
+
+<programlisting>&readme.code.br;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Class <literal>code</literal></title>
+ <para>
+The <literal>code</literal> type is converted to a <literal>pre</literal>
+section (preformatted text). As the meaning of tabs is unspecified in HTML,
+tabs are expanded to spaces.
+
+<programlisting>&readme.code.code;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Class <literal>a</literal></title>
+ <para>
+Hyperlinks, expressed by the <literal>a</literal> element type, are converted
+to the HTML <literal>a</literal> type. If the target of the hyperlink is given
+by <literal>href</literal>, the URL of this attribute can be used
+directly. Alternatively, the target can be given by
+<literal>readmeref</literal> in which case the ".html" suffix must be added to
+the file name.
+</para>
+
+ <para>
+Note that within <literal>a</literal> only #PCDATA is allowed, so the contents
+can be converted directly by applying <literal>escape_html</literal> to the
+character data contents.
+
+<programlisting>&readme.code.a;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Class <literal>footnote</literal></title>
+ <para>
+The <literal>footnote</literal> class has two methods:
+<literal>to_html</literal> to convert the footnote reference to HTML, and
+<literal>footnote_to_html</literal> to convert the footnote text itself.
+</para>
+
+ <para>
+The footnote reference is converted to a local hyperlink; more precisely, to
+two anchor tags which are connected with each other. The text anchor points to
+the footnote anchor, and the footnote anchor points to the text anchor.
+</para>
+
+ <para>
+The footnote must be allocated in the <literal>store</literal> object. By
+allocating the footnote, you get the number of the footnote, and the text of
+the footnote is stored until the end of the HTML page is reached when the
+footnotes can be printed. The <literal>to_html</literal> method stores simply
+the object itself, such that the <literal>footnote_to_html</literal> method is
+invoked on the same object that encountered the footnote.
+</para>
+
+ <para>
+The <literal>to_html</literal> only allocates the footnote, and prints the
+reference anchor, but it does not print nor convert the contents of the
+note. This is deferred until the footnotes actually get printed, i.e. the
+recursive call of <literal>to_html</literal> on the sub nodes is done by
+<literal>footnote_to_html</literal>.
+</para>
+
+ <para>
+Note that this technique does not work if you make another footnote within a
+footnote; the second footnote gets allocated but not printed.
+</para>
+
+ <para>
+<programlisting>&readme.code.footnote;</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>The specification of the document model</title>
+ <para>
+This code sets up the hash table that connects element types with the exemplars
+of the extension classes that convert the elements to HTML.
+
+<programlisting>&readme.code.tag-map;</programlisting>
+</para>
+ </sect2>
+
+<!-- <![RCDATA[&readme.code.to-html;]]> -->
+ </sect1>
+
+ </chapter>
+
+<!-- ********************************************************************** -->
+
+ <chapter>
+ <title>The objects representing the document</title>
+
+ <para>
+<emphasis>This description might be out-of-date. See the module interface files
+for updated information.</emphasis></para>
+
+ <sect1>
+ <title>The <literal>document</literal> class</title>
+ <para>
+<programlisting>
+<![CDATA[
+class [ 'ext ] document :
+ Pxp_types.collect_warnings ->
+ object
+ method init_xml_version : string -> unit
+ method init_root : 'ext node -> unit
+
+ method xml_version : string
+ method xml_standalone : bool
+ method dtd : dtd
+ method root : 'ext node
+
+ method encoding : Pxp_types.rep_encoding
+
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ end
+;;
+]]>
+</programlisting>
+
+The methods beginning with <literal>init_</literal> are only for internal use
+of the parser.
+</para>
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>xml_version</literal>: returns the version string at the beginning of
+the document. For example, "1.0" is returned if the document begins with
+<literal><?xml version="1.0"?></literal>.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>xml_standalone</literal>: returns the boolean value of
+<literal>standalone</literal> declaration in the XML declaration. If the
+<literal>standalone</literal> attribute is missing, <literal>false</literal> is
+returned. </para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>dtd</literal>: returns a reference to the global DTD object.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>root</literal>: returns a reference to the root element.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>encoding</literal>: returns the internal encoding of the
+document. This means that all strings of which the document consists are
+encoded in this character set.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>pinstr</literal>: returns the processing instructions outside the DTD
+and outside the root element. The argument passed to the method names a
+<emphasis>target</emphasis>, and the method returns all instructions with this
+target. The target is the first word inside <literal><?</literal> and
+<literal>?></literal>.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>pinstr_names</literal>: returns the names of the processing instructions</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>add_pinstr</literal>: adds another processing instruction. This method
+is used by the parser itself to enter the instructions returned by
+<literal>pinstr</literal>, but you can also enter additional instructions.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>write</literal>: writes the document to the passed stream as XML
+text using the passed (external) encoding. The generated text is always valid
+XML and can be parsed by PXP; however, the text is badly formatted (this is not
+a pretty printer).</para>
+ </listitem>
+ </itemizedlist>
+ </sect1>
+
+<!-- ********************************************************************** -->
+
+ <sect1>
+ <title>The class type <literal>node</literal></title>
+ <para>
+
+From <literal>Pxp_document</literal>:
+
+<programlisting>
+type node_type =
+ T_data
+| T_element of string
+| T_super_root
+| T_pinstr of string
+| T_comment
+<replaceable>and some other, reserved types</replaceable>
+;;
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+
+ <anchor id="type-node-general.sig"
+ >(* <link linkend="type-node-general" endterm="type-node-general.title"
+ ></link> *)
+
+ method extension : 'ext
+ method dtd : dtd
+ method parent : 'ext node
+ method root : 'ext node
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node &fun; unit) &fun; unit
+ method iter_nodes_sibl :
+ ('ext node option &fun; 'ext node &fun; 'ext node option &fun; unit) &fun; unit
+ method node_type : node_type
+ method encoding : Pxp_types.rep_encoding
+ method data : string
+ method position : (string * int * int)
+ method comment : string option
+ method pinstr : string &fun; proc_instruction list
+ method pinstr_names : string list
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+ <anchor id="type-node-atts.sig"
+ >(* <link linkend="type-node-atts" endterm="type-node-atts.title"
+ ></link> *)
+
+ method attribute : string &fun; Pxp_types.att_value
+ method required_string_attribute : string &fun; string
+ method optional_string_attribute : string &fun; string option
+ method required_list_attribute : string &fun; string list
+ method optional_list_attribute : string &fun; string list
+ method attribute_names : string list
+ method attribute_type : string &fun; Pxp_types.att_type
+ method attributes : (string * Pxp_types.att_value) list
+ method id_attribute_name : string
+ method id_attribute_value : string
+ method idref_attribute_names : string
+
+ <anchor id="type-node-mods.sig"
+ >(* <link linkend="type-node-mods" endterm="type-node-mods.title"
+ ></link> *)
+
+ method add_node : ?force:bool &fun; 'ext node &fun; unit
+ method add_pinstr : proc_instruction &fun; unit
+ method delete : unit
+ method set_nodes : 'ext node list &fun; unit
+ method quick_set_attributes : (string * Pxp_types.att_value) list &fun; unit
+ method set_comment : string option &fun; unit
+
+ <anchor id="type-node-cloning.sig"
+ >(* <link linkend="type-node-cloning" endterm="type-node-cloning.title"
+ ></link> *)
+
+ method orphaned_clone : 'self
+ method orphaned_flat_clone : 'self
+ method create_element :
+ ?position:(string * int * int) &fun;
+ dtd &fun; node_type &fun; (string * string) list &fun;
+ 'ext node
+ method create_data : dtd &fun; string &fun; 'ext node
+ method keep_always_whitespace_mode : unit
+
+ <anchor id="type-node-weird.sig"
+ >(* <link linkend="type-node-weird" endterm="type-node-weird.title"
+ ></link> *)
+
+ method local_validate : ?use_dfa:bool -> unit -> unit
+
+ (* ... Internal methods are undocumented. *)
+
+ end
+;;
+</programlisting>
+
+In the module <literal>Pxp_types</literal> you can find another type
+definition that is important in this context:
+
+<programlisting>
+type Pxp_types.att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+;;
+</programlisting>
+</para>
+
+ <sect2>
+ <title>The structure of document trees</title>
+
+<para>
+A node represents either an element or a character data section. There are two
+classes implementing the two aspects of nodes: <literal>element_impl</literal>
+and <literal>data_impl</literal>. The latter class does not implement all
+methods because some methods do not make sense for data nodes.
+</para>
+
+<para>
+(Note: PXP also supports a mode which forces that processing instructions and
+comments are represented as nodes of the document tree. However, these nodes
+are instances of <literal>element_impl</literal> with node types
+<literal>T_pinstr</literal> and <literal>T_comment</literal>,
+respectively. This mode must be explicitly configured; the basic representation
+knows only element and data nodes.)
+</para>
+
+ <para>The following figure
+(<link linkend="node-term" endterm="node-term"></link>) shows an example how
+a tree is constructed from element and data nodes. The circular areas
+represent element nodes whereas the ovals denote data nodes. Only elements
+may have subnodes; data nodes are always leaves of the tree. The subnodes
+of an element can be either element or data nodes; in both cases the O'Caml
+objects storing the nodes have the class type <literal>node</literal>.</para>
+
+ <para>Attributes (the clouds in the picture) are not directly
+integrated into the tree; there is always an extra link to the attribute
+list. This is also true for processing instructions (not shown in the
+picture). This means that there are separated access methods for attributes and
+processing instructions.</para>
+
+<figure id="node-term" float="1">
+<title>A tree with element nodes, data nodes, and attributes</title>
+<graphic fileref="pic/node_term" format="GIF"></graphic>
+</figure>
+
+ <para>Only elements, data sections, attributes and processing
+instructions (and comments, if configured) can, directly or indirectly, occur
+in the document tree. It is impossible to add entity references to the tree; if
+the parser finds such a reference, not the reference as such but the referenced
+text (i.e. the tree representing the structured text) is included in the
+tree.</para>
+
+ <para>Note that the parser collapses as much data material into one
+data node as possible such that there are normally never two adjacent data
+nodes. This invariant is enforced even if data material is included by entity
+references or CDATA sections, or if a data sequence is interrupted by
+comments. So <literal>a &amp; b <-- comment --> c <![CDATA[
+<> d]]></literal> is represented by only one data node, for
+instance. However, you can create document trees manually which break this
+invariant; it is only the way the parser forms the tree.
+</para>
+
+<figure id="node-general" float="1">
+<title>Nodes are doubly linked trees</title>
+<graphic fileref="pic/node_general" format="GIF"></graphic>
+</figure>
+
+ <para>
+The node tree has links in both directions: Every node has a link to its parent
+(if any), and it has links to the subnodes (see
+figure <link linkend="node-general" endterm="node-general"></link>). Obviously,
+this doubly-linked structure simplifies the navigation in the tree; but has
+also some consequences for the possible operations on trees.</para>
+
+ <para>
+Because every node must have at most <emphasis>one</emphasis> parent node,
+operations are illegal if they violate this condition. The following figure
+(<link linkend="node-add" endterm="node-add"></link>) shows on the left side
+that node <literal>y</literal> is added to <literal>x</literal> as new subnode
+which is allowed because <literal>y</literal> does not have a parent yet. The
+right side of the picture illustrates what would happen if <literal>y</literal>
+had a parent node; this is illegal because <literal>y</literal> would have two
+parents after the operation.</para>
+
+<figure id="node-add" float="1">
+<title>A node can only be added if it is a root</title>
+<graphic fileref="pic/node_add" format="GIF">
+</graphic>
+</figure>
+
+ <para>
+The "delete" operation simply removes the links between two nodes. In the
+picture (<link linkend="node-delete" endterm="node-delete"></link>) the node
+<literal>x</literal> is deleted from the list of subnodes of
+<literal>y</literal>. After that, <literal>x</literal> becomes the root of the
+subtree starting at this node.</para>
+
+<figure id="node-delete" float="1">
+<title>A deleted node becomes the root of the subtree</title>
+<graphic fileref="pic/node_delete" format="GIF"></graphic>
+</figure>
+
+ <para>
+It is also possible to make a clone of a subtree; illustrated in
+<link linkend="node-clone" endterm="node-clone"></link>. In this case, the
+clone is a copy of the original subtree except that it is no longer a
+subnode. Because cloning never keeps the connection to the parent, the clones
+are called <emphasis>orphaned</emphasis>.
+</para>
+
+<figure id="node-clone" float="1">
+<title>The clone of a subtree</title>
+<graphic fileref="pic/node_clone" format="GIF"></graphic>
+</figure>
+ </sect2>
+
+ <sect2>
+ <title>The methods of the class type <literal>node</literal></title>
+
+ <anchor id="type-node-general">
+ <formalpara>
+ <title id="type-node-general.title">
+ <link linkend="type-node-general.sig">General observers</link>
+ </title>
+
+ <para>
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>extension</literal>: The reference to the extension object which
+belongs to this node (see ...).</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>dtd</literal>: Returns a reference to the global DTD. All nodes
+of a tree must share the same DTD.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>parent</literal>: Get the father node. Raises
+<literal>Not_found</literal> in the case the node does not have a
+parent, i.e. the node is the root.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>root</literal>: Gets the reference to the root node of the tree.
+Every node is contained in a tree with a root, so this method always
+succeeds. Note that this method <emphasis>searches</emphasis> the root,
+which costs time proportional to the length of the path to the root.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>sub_nodes</literal>: Returns references to the children. The returned
+list reflects the order of the children. For data nodes, this method returns
+the empty list.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>iter_nodes f</literal>: Iterates over the children, and calls
+<literal>f</literal> for every child in turn.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>iter_nodes_sibl f</literal>: Iterates over the children, and calls
+<literal>f</literal> for every child in turn. <literal>f</literal> gets as
+arguments the previous node, the current node, and the next node.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>node_type</literal>: Returns either <literal>T_data</literal> which
+means that the node is a data node, or <literal>T_element n</literal>
+which means that the node is an element of type <literal>n</literal>.
+If configured, possible node types are also <literal>T_pinstr t</literal>
+indicating that the node represents a processing instruction with target
+<literal>t</literal>, and <literal>T_comment</literal> in which case the node
+is a comment.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>encoding</literal>: Returns the encoding of the strings.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>data</literal>: Returns the character data of this node and all
+children, concatenated as one string. The encoding of the string is what
+the method <literal>encoding</literal> returns.
+- For data nodes, this method simply returns the represented characters.
+For elements, the meaning of the method has been extended such that it
+returns something useful, i.e. the effectively contained characters, without
+markup. (For <literal>T_pinstr</literal> and <literal>T_comment</literal>
+nodes, the method returns the empty string.)
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>position</literal>: If configured, this method returns the position of
+the element as triple (entity, line, byteposition). For data nodes, the
+position is not stored. If the position is not available the triple
+<literal>"?", 0, 0</literal> is returned.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>comment</literal>: Returns <literal>Some text</literal> for comment
+nodes, and <literal>None</literal> for other nodes. The <literal>text</literal>
+is everything between the comment delimiters <literal><--</literal> and
+<literal>--></literal>.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>pinstr n</literal>: Returns all processing instructions that are
+directly contained in this element and that have a <emphasis>target</emphasis>
+specification of <literal>n</literal>. The target is the first word after
+the <literal><?</literal>.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>pinstr_names</literal>: Returns the list of all targets of processing
+instructions directly contained in this element.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>write s enc</literal>: Prints the node and all subnodes to the passed
+output stream as valid XML text, using the passed external encoding.
+</para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </formalpara>
+
+ <anchor id="type-node-atts">
+ <formalpara>
+ <title id="type-node-atts.title">
+ <link linkend="type-node-atts.sig">Attribute observers</link>
+ </title>
+ <para>
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>attribute n</literal>: Returns the value of the attribute with name
+<literal>n</literal>. This method returns a value for every declared
+attribute, and it raises <literal>Not_found</literal> for any undeclared
+attribute. Note that it even returns a value if the attribute is actually
+missing but is declared as <literal>#IMPLIED</literal> or has a default
+value. - Possible values are:
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>Implied_value</literal>: The attribute has been declared with the
+keyword <literal>#IMPLIED</literal>, and the attribute is missing in the
+attribute list of this element.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>Value s</literal>: The attribute has been declared as type
+<literal>CDATA</literal>, as <literal>ID</literal>, as
+<literal>IDREF</literal>, as <literal>ENTITY</literal>, or as
+<literal>NMTOKEN</literal>, or as enumeration or notation, and one of the two
+conditions holds: (1) The attribute value is present in the attribute list in
+which case the value is returned in the string <literal>s</literal>. (2) The
+attribute has been omitted, and the DTD declared the attribute with a default
+value. The default value is returned in <literal>s</literal>.
+- Summarized, <literal>Value s</literal> is returned for non-implied, non-list
+attribute values.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>Valuelist l</literal>: The attribute has been declared as type
+<literal>IDREFS</literal>, as <literal>ENTITIES</literal>, or
+as <literal>NMTOKENS</literal>, and one of the two conditions holds: (1) The
+attribute value is present in the attribute list in which case the
+space-separated tokens of the value are returned in the string list
+<literal>l</literal>. (2) The attribute has been omitted, and the DTD declared
+the attribute with a default value. The default value is returned in
+<literal>l</literal>.
+- Summarized, <literal>Valuelist l</literal> is returned for all list-type
+attribute values.
+</para>
+ </listitem>
+ </itemizedlist>
+
+Note that before the attribute value is returned, the value is normalized. This
+means that newlines are converted to spaces, and that references to character
+entities (i.e. <literal>&#<replaceable>n</replaceable>;</literal>) and
+general entities
+(i.e. <literal>&<replaceable>name</replaceable>;</literal>) are expanded;
+if necessary, expansion is performed recursively.
+</para>
+
+<para>
+In well-formedness mode, there is no DTD which could declare an
+attribute. Because of this, every occuring attribute is considered as a CDATA
+attribute.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>required_string_attribute n</literal>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is convenient
+if you expect a non-implied and non-list attribute value.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>optional_string_attribute n</literal>: returns the Value attribute
+called n, or the Valuelist attribute as a string where the list elements
+are separated by spaces. If the attribute value is implied, or if the
+attribute does not exists, the method returns None. - This method is
+convenient if you expect a non-list attribute value including the implied
+value.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>required_list_attribute n</literal>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, the method will fail. - This method is
+convenient if you expect a list attribute value.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>optional_list_attribute n</literal>: returns the Valuelist attribute
+called n, or the Value attribute as a list with a single element.
+If the attribute value is implied, or if the
+attribute does not exists, an empty list will be returned. - This method
+is convenient if you expect a list attribute value or the implied value.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>attribute_names</literal>: returns the list of all attribute names of
+this element. As this is a validating parser, this list is equal to the
+list of declared attributes.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>attribute_type n</literal>: returns the type of the attribute called
+<literal>n</literal>. See the module <literal>Pxp_types</literal> for a
+description of the encoding of the types.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>attributes</literal>: returns the list of pairs of names and values
+for all attributes of
+this element.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>id_attribute_name</literal>: returns the name of the attribute that is
+declared with type ID. There is at most one such attribute. The method raises
+<literal>Not_found</literal> if there is no declared ID attribute for the
+element type.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>id_attribute_value</literal>: returns the value of the attribute that
+is declared with type ID. There is at most one such attribute. The method raises
+<literal>Not_found</literal> if there is no declared ID attribute for the
+element type.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>idref_attribute_names</literal>: returns the list of attribute names
+that are declared as IDREF or IDREFS.</para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </formalpara>
+
+ <anchor id="type-node-mods">
+ <formalpara>
+ <title id="type-node-mods.title">
+ <link linkend="type-node-mods.sig">Modifying methods</link>
+ </title>
+
+ <para>
+The following methods are only defined for element nodes (more exactly:
+the methods are defined for data nodes, too, but fail always).
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>add_node sn</literal>: Adds sub node <literal>sn</literal> to the list
+of children. This operation is illustrated in the picture
+<link linkend="node-add" endterm="node-add"></link>. This method expects that
+<literal>sn</literal> is a root, and it requires that <literal>sn</literal> and
+the current object share the same DTD.
+</para>
+
+<para>Because <literal>add_node</literal> is the method the parser itself uses
+to add new nodes to the tree, it performs by default some simple validation
+checks: If the content model is a regular expression, it is not allowed to add
+data nodes to this node unless the new nodes consist only of whitespace. In
+this case, the new data nodes are silently dropped (you can change this by
+invoking <literal>keep_always_whitespace_mode</literal>).
+</para>
+
+<para>If the document is flagged as stand-alone, these data nodes only
+containing whitespace are even forbidden if the element declaration is
+contained in an external entity. This case is detected and rejected.</para>
+
+<para>If the content model is <literal>EMPTY</literal>, it is not allowed to
+add any data node unless the data node is empty. In this case, the new data
+node is silently dropped.
+</para>
+
+<para>These checks only apply if there is a DTD. In well-formedness mode, it is
+assumed that every element is declared with content model
+<literal>ANY</literal> which prohibits any validation check. Furthermore, you
+turn these checks off by passing <literal>~force:true</literal> as first
+argument.</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>add_pinstr pi</literal>: Adds the processing instruction
+<literal>pi</literal> to the list of processing instructions.
+</para>
+ </listitem>
+
+ <listitem>
+ <para>
+<literal>delete</literal>: Deletes this node from the tree. After this
+operation, this node is no longer the child of the former father node; and the
+node loses the connection to the father as well. This operation is illustrated
+by the figure <link linkend="node-delete" endterm="node-delete"></link>.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>set_nodes nl</literal>: Sets the list of children to
+<literal>nl</literal>. It is required that every member of <literal>nl</literal>
+is a root, and that all members and the current object share the same DTD.
+Unlike <literal>add_node</literal>, no validation checks are performed.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>quick_set_attributes atts</literal>: sets the attributes of this
+element to <literal>atts</literal>. It is <emphasis>not</emphasis> checked
+whether <literal>atts</literal> matches the DTD or not; it is up to the
+caller of this method to ensure this. (This method may be useful to transform
+the attribute values, i.e. apply a mapping to every attribute.)
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>set_comment text</literal>: This method is only applicable to
+<literal>T_comment</literal> nodes; it sets the comment text contained by such
+nodes. </para>
+ </listitem>
+ </itemizedlist>
+</para>
+ </formalpara>
+
+ <anchor id="type-node-cloning">
+ <formalpara>
+ <title id="type-node-cloning.title">
+ <link linkend="type-node-cloning.sig">Cloning methods</link>
+ </title>
+
+ <para>
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>orphaned_clone</literal>: Returns a clone of the node and the complete
+tree below this node (deep clone). The clone does not have a parent (i.e. the
+reference to the parent node is <emphasis>not</emphasis> cloned). While
+copying the subtree, strings are skipped; it is likely that the original tree
+and the copy tree share strings. Extension objects are cloned by invoking
+the <literal>clone</literal> method on the original objects; how much of
+the extension objects is cloned depends on the implemention of this method.
+</para>
+ <para>This operation is illustrated by the figure
+<link linkend="node-clone" endterm="node-clone"></link>.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>orphaned_flat_clone</literal>: Returns a clone of the node,
+but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<anchor id="type-node-meth-create-element">
+<literal>create_element dtd nt al</literal>: Returns a flat copy of this node
+(which must be an element) with the following modifications: The DTD is set to
+<literal>dtd</literal>; the node type is set to <literal>nt</literal>, and the
+new attribute list is set to <literal>al</literal> (given as list of
+(name,value) pairs). The copy does not have children nor a parent. It does not
+contain processing instructions. See
+<link linkend="type-node-ex-create-element">the example below</link>.
+</para>
+
+ <para>Note that you can specify the position of the new node
+by the optional argument <literal>~position</literal>.</para>
+ </listitem>
+ <listitem>
+ <para>
+<anchor id="type-node-meth-create-data">
+<literal>create_data dtd cdata</literal>: Returns a flat copy of this node
+(which must be a data node) with the following modifications: The DTD is set to
+<literal>dtd</literal>; the node type is set to <literal>T_data</literal>; the
+attribute list is empty (data nodes never have attributes); the list of
+children and PIs is empty, too (same reason). The new node does not have a
+parent. The value <literal>cdata</literal> is the new character content of the
+node. See
+<link linkend="type-node-ex-create-data">the example below</link>.
+</para>
+ </listitem>
+ <listitem>
+ <para>
+<literal>keep_always_whitespace_mode</literal>: Even data nodes which are
+normally dropped because they only contain ignorable whitespace, can added to
+this node once this mode is turned on. (This mode is useful to produce
+canonical XML.)
+</para>
+ </listitem>
+ </itemizedlist>
+</para>
+ </formalpara>
+
+ <anchor id="type-node-weird">
+ <formalpara>
+ <title id="type-node-weird.title">
+ <link linkend="type-node-weird.sig">Validating methods</link>
+ </title>
+ <para>
+There is one method which locally validates the node, i.e. checks whether the
+subnodes match the content model of this node.
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>
+<literal>local_validate</literal>: Checks that this node conforms to the
+DTD by comparing the type of the subnodes with the content model for this
+node. (Applications need not call this method unless they add new nodes
+themselves to the tree.)
+</para>
+ </listitem>
+ </itemizedlist>
+</para>
+ </formalpara>
+ </sect2>
+
+ <sect2>
+ <title>The class <literal>element_impl</literal></title>
+ <para>
+This class is an implementation of <literal>node</literal> which
+realizes element nodes:
+
+<programlisting>
+<![CDATA[
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+]]>
+</programlisting>
+
+</para>
+ <formalpara>
+ <title>Constructor</title>
+ <para>
+You can create a new instance by
+
+<programlisting>
+new element_impl <replaceable>extension_object</replaceable>
+</programlisting>
+
+which creates a special form of empty element which already contains a
+reference to the <replaceable>extension_object</replaceable>, but is
+otherwise empty. This special form is called an
+<emphasis>exemplar</emphasis>. The purpose of exemplars is that they serve as
+patterns that can be duplicated and filled with data. The method
+<link linkend="type-node-meth-create-element">
+<literal>create_element</literal></link> is designed to perform this action.
+</para>
+ </formalpara>
+
+ <anchor id="type-node-ex-create-element">
+ <formalpara>
+ <title>Example</title>
+
+ <para>First, create an exemplar by
+
+<programlisting>
+let exemplar_ext = ... in
+let exemplar = new element_impl exemplar_ext in
+</programlisting>
+
+The <literal>exemplar</literal> is not used in node trees, but only as
+a pattern when the element nodes are created:
+
+<programlisting>
+let element = exemplar # <link linkend="type-node-meth-create-element">create_element</link> dtd (T_element name) attlist
+</programlisting>
+
+The <literal>element</literal> is a copy of <literal>exemplar</literal>
+(even the extension <literal>exemplar_ext</literal> has been copied)
+which ensures that <literal>element</literal> and its extension are objects
+of the same class as the exemplars; note that you need not to pass a
+class name or other meta information. The copy is initially connected
+with the <literal>dtd</literal>, it gets a node type, and the attribute list
+is filled. The <literal>element</literal> is now fully functional; it can
+be added to another element as child, and it can contain references to
+subnodes.
+</para>
+ </formalpara>
+
+ </sect2>
+
+ <sect2>
+ <title>The class <literal>data_impl</literal></title>
+ <para>
+This class is an implementation of <literal>node</literal> which
+should be used for all character data nodes:
+
+<programlisting>
+<![CDATA[
+class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
+]]>
+</programlisting>
+
+</para>
+
+ <formalpara>
+ <title>Constructor</title>
+ <para>
+You can create a new instance by
+
+<programlisting>
+new data_impl <replaceable>extension_object</replaceable>
+</programlisting>
+
+which creates an empty exemplar node which is connected to
+<replaceable>extension_object</replaceable>. The node does not contain a
+reference to any DTD, and because of this it cannot be added to node trees.
+</para>
+ </formalpara>
+
+ <para>To get a fully working data node, apply the method
+<link linkend="type-node-meth-create-data"><literal>create_data</literal>
+</link> to the exemplar (see example).
+</para>
+
+ <anchor id="type-node-ex-create-data">
+ <formalpara>
+ <title>Example</title>
+
+ <para>First, create an exemplar by
+
+<programlisting>
+let exemplar_ext = ... in
+let exemplar = new exemplar_ext data_impl in
+</programlisting>
+
+The <literal>exemplar</literal> is not used in node trees, but only as
+a pattern when the data nodes are created:
+
+<programlisting>
+let data_node = exemplar # <link
+ linkend="type-node-meth-create-data">create_data</link> dtd "The characters contained in the data node"
+</programlisting>
+
+The <literal>data_node</literal> is a copy of <literal>exemplar</literal>.
+The copy is initially connected
+with the <literal>dtd</literal>, and it is filled with character material.
+The <literal>data_node</literal> is now fully functional; it can
+be added to an element as child.
+</para>
+ </formalpara>
+ </sect2>
+
+ <sect2>
+ <title>The type <literal>spec</literal></title>
+ <para>
+The type <literal>spec</literal> defines a way to handle the details of
+creating nodes from exemplars.
+
+<programlisting><![CDATA[
+type 'ext spec
+constraint 'ext = 'ext node #extension
+
+val make_spec_from_mapping :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_mapping: (string, 'ext node) Hashtbl.t ->
+ unit ->
+ 'ext spec
+
+val make_spec_from_alist :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_alist : (string * 'ext node) list ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_alist: (string * 'ext node) list ->
+ unit ->
+ 'ext spec
+]]></programlisting>
+
+The two functions <literal>make_spec_from_mapping</literal> and
+<literal>make_spec_from_alist</literal> create <literal>spec</literal>
+values. Both functions are functionally equivalent and the only difference is
+that the first function prefers hashtables and the latter associative lists to
+describe mappings from names to exemplars.
+</para>
+
+<para>
+You can specify exemplars for the various kinds of nodes that need to be
+generated when an XML document is parsed:
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para><literal>~super_root_exemplar</literal>: This exemplar
+is used to create the super root. This special node is only created if the
+corresponding configuration option has been selected; it is the parent node of
+the root node which may be convenient if every working node must have a parent.</para>
+ </listitem>
+ <listitem>
+ <para><literal>~comment_exemplar</literal>: This exemplar is
+used when a comment node must be created. Note that such nodes are only created
+if the corresponding configuration option is "on".
+</para>
+ </listitem>
+ <listitem>
+ <para><literal>~default_pinstr_exemplar</literal>: If a node
+for a processing instruction must be created, and the instruction is not listed
+in the table passed by <literal>~pinstr_mapping</literal> or
+<literal>~pinstr_alist</literal>, this exemplar is used.
+Again the configuration option must be "on" in order to create such nodes at
+all.
+</para>
+ </listitem>
+ <listitem>
+ <para><literal>~pinstr_mapping</literal> or
+<literal>~pinstr_alist</literal>: Map the target names of processing
+instructions to exemplars. These mappings are only used when nodes for
+processing instructions are created.</para>
+ </listitem>
+ <listitem>
+ <para><literal>~data_exemplar</literal>: The exemplar for
+ordinary data nodes.</para>
+ </listitem>
+ <listitem>
+ <para><literal>~default_element_exemplar</literal>: This
+exemplar is used if an element node must be created, but the element type
+cannot be found in the tables <literal>element_mapping</literal> or
+<literal>element_alist</literal>.</para>
+ </listitem>
+ <listitem>
+ <para><literal>~element_mapping</literal> or
+<literal>~element_alist</literal>: Map the element types to exemplars. These
+mappings are used to create element nodes.</para>
+ </listitem>
+ </itemizedlist>
+
+In most cases, you only want to create <literal>spec</literal> values to pass
+them to the parser functions found in <literal>Pxp_yacc</literal>. However, it
+might be useful to apply <literal>spec</literal> values directly.
+</para>
+
+<para>The following functions create various types of nodes by selecting the
+corresponding exemplar from the passed <literal>spec</literal> value, and by
+calling <literal>create_element</literal> or <literal>create_data</literal> on
+the exemplar.
+
+<programlisting><![CDATA[
+val create_data_node :
+ 'ext spec ->
+ dtd ->
+ (* data material: *) string ->
+ 'ext node
+
+val create_element_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* element type: *) string ->
+ (* attributes: *) (string * string) list ->
+ 'ext node
+
+val create_super_root_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ 'ext node
+
+val create_comment_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ (* comment text: *) string ->
+ 'ext node
+
+val create_pinstr_node :
+ ?position:(string * int * int) ->
+ 'ext spec ->
+ dtd ->
+ proc_instruction ->
+ 'ext node
+]]></programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Examples</title>
+
+ <formalpara>
+ <title>Building trees.</title>
+
+ <para>Here is the piece of code that creates the tree of
+the figure <link linkend="node-term" endterm="node-term"></link>. The extension
+object and the DTD are beyond the scope of this example.
+
+<programlisting>
+let exemplar_ext = ... (* some extension *) in
+let dtd = ... (* some DTD *) in
+
+let element_exemplar = new element_impl exemplar_ext in
+let data_exemplar = new data_impl exemplar_ext in
+
+let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
+and b1 = element_exemplar # create_element dtd (T_element "b") []
+and c1 = element_exemplar # create_element dtd (T_element "c") []
+and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
+in
+
+let cherries = data_exemplar # create_data dtd "Cherries" in
+let orange = data_exemplar # create_data dtd "An orange" in
+
+a1 # add_node b1;
+a1 # add_node c1;
+b1 # add_node a2;
+b1 # add_node cherries;
+a2 # add_node orange;
+</programlisting>
+
+Alternatively, the last block of statements could also be written as:
+
+<programlisting>
+a1 # set_nodes [b1; c1];
+b1 # set_nodes [a2; cherries];
+a2 # set_nodes [orange];
+</programlisting>
+
+The root of the tree is <literal>a1</literal>, i.e. it is true that
+
+<programlisting>
+x # root == a1
+</programlisting>
+
+for every x from { <literal>a1</literal>, <literal>a2</literal>,
+<literal>b1</literal>, <literal>c1</literal>, <literal>cherries</literal>,
+<literal>orange</literal> }.
+</para>
+ </formalpara>
+ <para>
+Furthermore, the following properties hold:
+
+<programlisting>
+ a1 # attribute "att" = Value "apple"
+& a2 # attribute "att" = Value "orange"
+
+& cherries # data = "Cherries"
+& orange # data = "An orange"
+& a1 # data = "CherriesAn orange"
+
+& a1 # node_type = T_element "a"
+& a2 # node_type = T_element "a"
+& b1 # node_type = T_element "b"
+& c1 # node_type = T_element "c"
+& cherries # node_type = T_data
+& orange # node_type = T_data
+
+& a1 # sub_nodes = [ b1; c1 ]
+& a2 # sub_nodes = [ orange ]
+& b1 # sub_nodes = [ a2; cherries ]
+& c1 # sub_nodes = []
+& cherries # sub_nodes = []
+& orange # sub_nodes = []
+
+& a2 # parent == a1
+& b1 # parent == b1
+& c1 # parent == a1
+& cherries # parent == b1
+& orange # parent == a2
+</programlisting>
+</para>
+ <formalpara>
+ <title>Searching nodes.</title>
+
+ <para>The following function searches all nodes of a tree
+for which a certain condition holds:
+
+<programlisting>
+let rec search p t =
+ if p t then
+ t :: search_list p (t # sub_nodes)
+ else
+ search_list p (t # sub_nodes)
+
+and search_list p l =
+ match l with
+ [] -> []
+ | t :: l' -> (search p t) @ (search_list p l')
+;;
+</programlisting>
+</para>
+ </formalpara>
+
+ <para>For example, if you want to search all elements of a certain
+type <literal>et</literal>, the function <literal>search</literal> can be
+applied as follows:
+
+<programlisting>
+let search_element_type et t =
+ search (fun x -> x # node_type = T_element et) t
+;;
+</programlisting>
+</para>
+
+ <formalpara>
+ <title>Getting attribute values.</title>
+
+ <para>Suppose we have the declaration:
+
+<programlisting><![CDATA[
+<!ATTLIST e a CDATA #REQUIRED
+ b CDATA #IMPLIED
+ c CDATA "12345">]]>
+</programlisting>
+
+In this case, every element <literal>e</literal> must have an attribute
+<literal>a</literal>, otherwise the parser would indicate an error. If
+the O'Caml variable <literal>n</literal> holds the node of the tree
+corresponding to the element, you can get the value of the attribute
+<literal>a</literal> by
+
+<programlisting>
+let value_of_a = n # required_string_attribute "a"
+</programlisting>
+
+which is more or less an abbreviation for
+
+<programlisting><![CDATA[
+let value_of_a =
+ match n # attribute "a" with
+ Value s -> s
+ | _ -> assert false]]>
+</programlisting>
+
+- as the attribute is required, the <literal>attribute</literal> method always
+returns a <literal>Value</literal>.
+</para>
+ </formalpara>
+
+ <para>In contrast to this, the attribute <literal>b</literal> can be
+omitted. In this case, the method <literal>required_string_attribute</literal>
+works only if the attribute is there, and the method will fail if the attribute
+is missing. To get the value, you can apply the method
+<literal>optional_string_attribute</literal>:
+
+<programlisting>
+let value_of_b = n # optional_string_attribute "b"
+</programlisting>
+
+Now, <literal>value_of_b</literal> is of type <literal>string option</literal>,
+and <literal>None</literal> represents the omitted attribute. Alternatively,
+you could also use <literal>attribute</literal>:
+
+<programlisting><![CDATA[
+let value_of_b =
+ match n # attribute "b" with
+ Value s -> Some s
+ | Implied_value -> None
+ | _ -> assert false]]>
+</programlisting>
+</para>
+
+ <para>The attribute <literal>c</literal> behaves much like
+<literal>a</literal>, because it has always a value. If the attribute is
+omitted, the default, here "12345", will be returned instead. Because of this,
+you can again use <literal>required_string_attribute</literal> to get the
+value.
+</para>
+
+ <para>The type <literal>CDATA</literal> is the most general string
+type. The types <literal>NMTOKEN</literal>, <literal>ID</literal>,
+<literal>IDREF</literal>, <literal>ENTITY</literal>, and all enumerators and
+notations are special forms of string types that restrict the possible
+values. From O'Caml, they behave like <literal>CDATA</literal>, i.e. you can
+use the methods <literal>required_string_attribute</literal> and
+<literal>optional_string_attribute</literal>, too.
+</para>
+
+ <para>In contrast to this, the types <literal>NMTOKENS</literal>,
+<literal>IDREFS</literal>, and <literal>ENTITIES</literal> mean lists of
+strings. Suppose we have the declaration:
+
+<programlisting><![CDATA[
+<!ATTLIST f d NMTOKENS #REQUIRED
+ e NMTOKENS #IMPLIED>]]>
+</programlisting>
+
+The type <literal>NMTOKENS</literal> stands for lists of space-separated
+tokens; for example the value <literal>"1 abc 23ef"</literal> means the list
+<literal>["1"; "abc"; "23ef"]</literal>. (Again, <literal>IDREFS</literal>
+and <literal>ENTITIES</literal> have more restricted values.) To get the
+value of attribute <literal>d</literal>, one can use
+
+<programlisting>
+let value_of_d = n # required_list_attribute "d"
+</programlisting>
+
+or
+
+<programlisting><![CDATA[
+let value_of_d =
+ match n # attribute "d" with
+ Valuelist l -> l
+ | _ -> assert false]]>
+</programlisting>
+
+As <literal>d</literal> is required, the attribute cannot be omitted, and
+the <literal>attribute</literal> method returns always a
+<literal>Valuelist</literal>.
+</para>
+
+ <para>For optional attributes like <literal>e</literal>, apply
+
+<programlisting>
+let value_of_e = n # optional_list_attribute "e"
+</programlisting>
+
+or
+
+<programlisting><![CDATA[
+let value_of_e =
+ match n # attribute "e" with
+ Valuelist l -> l
+ | Implied_value -> []
+ | _ -> assert false]]>
+</programlisting>
+
+Here, the case that the attribute is missing counts like the empty list.
+</para>
+
+ </sect2>
+
+
+ <sect2>
+ <title>Iterators</title>
+
+ <para>There are also several iterators in Pxp_document; please see
+the mli file for details. You can find examples for them in the
+"simple_transformation" directory.
+
+<programlisting><![CDATA[
+val find : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node
+
+val find_all : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node list
+
+val find_element : ?deeply:bool ->
+ string -> 'ext node -> 'ext node
+
+val find_all_elements : ?deeply:bool ->
+ string -> 'ext node -> 'ext node list
+
+exception Skip
+val map_tree : pre:('exta node -> 'extb node) ->
+ ?post:('extb node -> 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+
+val map_tree_sibl :
+ pre: ('exta node option -> 'exta node -> 'exta node option ->
+ 'extb node) ->
+ ?post:('extb node option -> 'extb node -> 'extb node option ->
+ 'extb node) ->
+ 'exta node ->
+ 'extb node
+
+val iter_tree : ?pre:('ext node -> unit) ->
+ ?post:('ext node -> unit) ->
+ 'ext node ->
+ unit
+
+val iter_tree_sibl :
+ ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ 'ext node ->
+ unit
+]]></programlisting>
+</para>
+ </sect2>
+
+ </sect1>
+
+<!-- ********************************************************************** -->
+
+ <sect1>
+ <title>The class type <literal>extension</literal></title>
+ <para>
+
+<programlisting>
+<![CDATA[
+class type [ 'node ] extension =
+ object ('self)
+ method clone : 'self
+ (* "clone" should return an exact deep copy of the object. *)
+ method node : 'node
+ (* "node" returns the corresponding node of this extension. This method
+ * intended to return exactly what previously has been set by "set_node".
+ *)
+ method set_node : 'node -> unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end
+]]>
+</programlisting>
+
+This is the type of classes used for node extensions. For every node of the
+document tree, there is not only the <literal>node</literal> object, but also
+an <literal>extension</literal> object. The latter has minimal
+functionality; it has only the necessary methods to be attached to the node
+object containing the details of the node instance. The extension object is
+called extension because its purpose is extensibility.</para>
+
+ <para>For some reasons, it is impossible to derive the
+<literal>node</literal> classes (i.e. <literal>element_impl</literal> and
+<literal>data_impl</literal>) such that the subclasses can be extended by new
+new methods. But
+subclassing nodes is a great feature, because it allows the user to provide
+different classes for different types of nodes. The extension objects are a
+workaround that is as powerful as direct subclassing, the costs are
+some notation overhead.
+</para>
+
+<figure id="extension-general" float="1">
+<title>The structure of nodes and extensions</title>
+<graphic fileref="pic/extension_general" format="GIF">
+</graphic>
+</figure>
+
+ <para>The picture shows how the nodes and extensions are linked
+together. Every node has a reference to its extension, and every extension has
+a reference to its node. The methods <literal>extension</literal> and
+<literal>node</literal> follow these references; a typical phrase is
+
+<programlisting>
+self # node # attribute "xy"
+</programlisting>
+
+to get the value of an attribute from a method defined in the extension object;
+or
+
+<programlisting>
+self # node # iter
+ (fun n -> n # extension # my_method ...)
+</programlisting>
+
+to iterate over the subnodes and to call <literal>my_method</literal> of the
+corresponding extension objects.
+</para>
+
+ <para>Note that extension objects do not have references to subnodes
+(or "subextensions") themselves; in order to get one of the children of an
+extension you must first go to the node object, then get the child node, and
+finally reach the extension that is logically the child of the extension you
+started with.</para>
+
+ <sect2>
+ <title>How to define an extension class</title>
+
+ <para>At minimum, you must define the methods
+<literal>clone</literal>, <literal>node</literal>, and
+<literal>set_node</literal> such that your class is compatible with the type
+<literal>extension</literal>. The method <literal>set_node</literal> is called
+during the initialization of the node, or after a node has been cloned; the
+node object invokes <literal>set_node</literal> on the extension object to tell
+it that this node is now the object the extension is linked to. The extension
+must return the node object passed as argument of <literal>set_node</literal>
+when the <literal>node</literal> method is called.</para>
+
+ <para>The <literal>clone</literal> method must return a copy of the
+extension object; at least the object itself must be duplicated, but if
+required, the copy should deeply duplicate all objects and values that are
+referred by the extension, too. Whether this is required, depends on the
+application; <literal>clone</literal> is invoked by the node object when one of
+its cloning methods is called.</para>
+
+ <para>A good starting point for an extension class:
+
+<programlisting>
+<![CDATA[class custom_extension =
+ object (self)
+
+ val mutable node = (None : custom_extension node option)
+
+ method clone = {< >}
+
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+
+ method set_node n =
+ node <- Some n
+
+ end
+]]>
+</programlisting>
+
+This class is compatible with <literal>extension</literal>. The purpose of
+defining such a class is, of course, adding further methods; and you can do it
+without restriction.
+</para>
+
+ <para>Often, you want not only one extension class. In this case,
+it is the simplest way that all your classes (for one kind of document) have
+the same type (with respect to the interface; i.e. it does not matter if your
+classes differ in the defined private methods and instance variables, but
+public methods count). This approach avoids lots of coercions and problems with
+type incompatibilities. It is simple to implement:
+
+<programlisting>
+<![CDATA[class custom_extension =
+ object (self)
+ val mutable node = (None : custom_extension node option)
+
+ method clone = ... (* see above *)
+ method node = ... (* see above *)
+ method set_node n = ... (* see above *)
+
+ method virtual my_method1 : ...
+ method virtual my_method2 : ...
+ ... (* etc. *)
+ end
+
+class custom_extension_kind_A =
+ object (self)
+ inherit custom_extension
+
+ method my_method1 = ...
+ method my_method2 = ...
+ end
+
+class custom_extension_kind_B =
+ object (self)
+ inherit custom_extension
+
+ method my_method1 = ...
+ method my_method2 = ...
+ end
+]]>
+</programlisting>
+
+If a class does not need a method (e.g. because it does not make sense, or it
+would violate some important condition), it is possible to define the method
+and to always raise an exception when the method is invoked
+(e.g. <literal>assert false</literal>).
+</para>
+
+ <para>The latter is a strong recommendation: do not try to further
+specialize the types of extension objects. It is difficult, sometimes even
+impossible, and almost never worth-while.</para>
+ </sect2>
+
+ <sect2>
+ <title>How to bind extension classes to element types</title>
+
+ <para>Once you have defined your extension classes, you can bind them
+to element types. The simplest case is that you have only one class and that
+this class is to be always used. The parsing functions in the module
+<literal>Pxp_yacc</literal> take a <literal>spec</literal> argument which
+can be customized. If your single class has the name <literal>c</literal>,
+this argument should be
+
+<programlisting>
+let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist: []
+ ()
+</programlisting>
+
+This means that data nodes will be created from the exemplar passed by
+~data_exemplar and that all element nodes will be made from the exemplar
+specified by ~default_element_exemplar. In ~element_alist, you can
+pass that different exemplars are to be used for different element types; but
+this is an optional feature. If you do not need it, pass the empty list.
+</para>
+
+<para>
+Remember that an exemplar is a (node, extension) pair that serves as pattern
+when new nodes (and the corresponding extension objects) are added to the
+document tree. In this case, the exemplar contains <literal>c</literal> as
+extension, and when nodes are created, the exemplar is cloned, and cloning
+makes also a copy of <literal>c</literal> such that all nodes of the document
+tree will have a copy of <literal>c</literal> as extension.
+</para>
+
+ <para>The <literal>~element_alist</literal> argument can bind
+specific element types to specific exemplars; as exemplars may be instances of
+different classes it is effectively possible to bind element types to
+classes. For example, if the element type "p" is implemented by class "c_p",
+and "q" is realized by "c_q", you can pass the following value:
+
+<programlisting>
+let spec =
+ make_spec_from_alist
+ ~data_exemplar: (new data_impl c)
+ ~default_element_exemplar: (new element_impl c)
+ ~element_alist:
+ [ "p", new element_impl c_p;
+ "q", new element_impl c_q;
+ ]
+ ()
+</programlisting>
+
+The extension object <literal>c</literal> is still used for all data nodes and
+for all other element types.
+</para>
+
+ </sect2>
+
+ </sect1>
+
+<!-- ********************************************************************** -->
+
+ <sect1>
+ <title>Details of the mapping from XML text to the tree representation
+</title>
+
+ <sect2>
+ <title>The representation of character-free elements</title>
+
+ <para>If an element declaration does not allow the element to
+contain character data, the following rules apply.</para>
+
+ <para>If the element must be empty, i.e. it is declared with the
+keyword <literal>EMPTY</literal>, the element instance must be effectively
+empty (it must not even contain whitespace characters). The parser guarantees
+that a declared <literal>EMPTY</literal> element does never contain a data
+node, even if the data node represents the empty string.</para>
+
+ <para>If the element declaration only permits other elements to occur
+within that element but not character data, it is still possible to insert
+whitespace characters between the subelements. The parser ignores these
+characters, too, and does not create data nodes for them.</para>
+
+ <formalpara>
+ <title>Example.</title>
+
+ <para>Consider the following element types:
+
+<programlisting><![CDATA[
+<!ELEMENT x ( #PCDATA | z )* >
+<!ELEMENT y ( z )* >
+<!ELEMENT z EMPTY>
+]]></programlisting>
+
+Only <literal>x</literal> may contain character data, the keyword
+<literal>#PCDATA</literal> indicates this. The other types are character-free.
+</para>
+ </formalpara>
+
+ <para>The XML term
+
+<programlisting><![CDATA[
+<x><z/> <z/></x>
+]]></programlisting>
+
+will be internally represented by an element node for <literal>x</literal>
+with three subnodes: the first <literal>z</literal> element, a data node
+containing the space character, and the second <literal>z</literal> element.
+In contrast to this, the term
+
+<programlisting><![CDATA[
+<y><z/> <z/></y>
+]]></programlisting>
+
+is represented by an element node for <literal>y</literal> with only
+<emphasis>two</emphasis> subnodes, the two <literal>z</literal> elements. There
+is no data node for the space character because spaces are ignored in the
+character-free element <literal>y</literal>.
+</para>
+
+ </sect2>
+
+ <sect2>
+ <title>The representation of character data</title>
+
+ <para>The XML specification allows all Unicode characters in XML
+texts. This parser can be configured such that UTF-8 is used to represent the
+characters internally; however, the default character encoding is
+ISO-8859-1. (Currently, no other encodings are possible for the internal string
+representation; the type <literal>Pxp_types.rep_encoding</literal> enumerates
+the possible encodings. Principially, the parser could use any encoding that is
+ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and
+ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal
+encodings (or other multibyte encodings which are not ASCII-compatible) unless
+major parts of the parser are rewritten - unlikely...)
+</para>
+
+<para>
+The internal encoding may be different from the external encoding (specified
+in the XML declaration <literal><?xml ... encoding="..."?></literal>); in
+this case the strings are automatically converted to the internal encoding.
+</para>
+
+<para>
+If the internal encoding is ISO-8859-1, it is possible that there are
+characters that cannot be represented. In this case, the parser ignores such
+characters and prints a warning (to the <literal>collect_warning</literal>
+object that must be passed when the parser is called).
+</para>
+
+ <para>The XML specification allows lines to be separated by single LF
+characters, by CR LF character sequences, or by single CR
+characters. Internally, these separators are always converted to single LF
+characters.</para>
+
+ <para>The parser guarantees that there are never two adjacent data
+nodes; if necessary, data material that would otherwise be represented by
+several nodes is collapsed into one node. Note that you can still create node
+trees with adjacent data nodes; however, the parser does not return such trees.
+</para>
+
+ <para>Note that CDATA sections are not represented specially; such
+sections are added to the current data material that being collected for the
+next data node.</para>
+ </sect2>
+
+
+ <sect2>
+ <title>The representation of entities within documents</title>
+
+ <para><emphasis>Entities are not represented within
+documents!</emphasis> If the parser finds an entity reference in the document
+content, the reference is immediately expanded, and the parser reads the
+expansion text instead of the reference.
+</para>
+ </sect2>
+
+ <sect2>
+ <title>The representation of attributes</title> <para>As attribute
+values are composed of Unicode characters, too, the same problems with the
+character encoding arise as for character material. Attribute values are
+converted to the internal encoding, too; and if there are characters that
+cannot be represented, these are dropped, and a warning is printed.</para>
+
+ <para>Attribute values are normalized before they are returned by
+methods like <literal>attribute</literal>. First, any remaining entity
+references are expanded; if necessary, expansion is performed recursively.
+Second, newline characters (any of LF, CR LF, or CR characters) are converted
+to single space characters. Note that especially the latter action is
+prescribed by the XML standard (but <literal> </literal> is not converted
+such that it is still possible to include line feeds into attributes).
+</para>
+ </sect2>
+
+ <sect2>
+ <title>The representation of processing instructions</title>
+<para>Processing instructions are parsed to some extent: The first word of the
+PI is called the target, and it is stored separated from the rest of the PI:
+
+<programlisting><![CDATA[
+<?target rest?>
+]]></programlisting>
+
+The exact location where a PI occurs is not represented (by default). The
+parser puts the PI into the object that represents the embracing construct (an
+element, a DTD, or the whole document); that means you can find out which PIs
+occur in a certain element, in the DTD, or in the whole document, but you
+cannot lookup the exact position within the construct.
+</para>
+
+ <para>If you require the exact location of PIs, it is possible to
+create extra nodes for them. This mode is controled by the option
+<literal>enable_pinstr_nodes</literal>. The additional nodes have the node type
+<literal>T_pinstr <replaceable>target</replaceable></literal>, and are created
+from special exemplars contained in the <literal>spec</literal> (see
+pxp_document.mli).</para>
+ </sect2>
+
+ <sect2>
+ <title>The representation of comments</title>
+
+<para>Normally, comments are not represented; they are dropped by
+default. However, if you require them, it is possible to create
+<literal>T_comment</literal> nodes for them. This mode can be specified by the
+option <literal>enable_comment_nodes</literal>. Comment nodes are created from
+special exemplars contained in the <literal>spec</literal> (see
+pxp_document.mli). You can access the contents of comments through the
+method <literal>comment</literal>.</para>
+ </sect2>
+
+ <sect2>
+ <title>The attributes <literal>xml:lang</literal> and
+<literal>xml:space</literal></title>
+
+ <para>These attributes are not supported specially; they are handled
+like any other attribute.</para>
+ </sect2>
+
+
+ <sect2>
+ <title>And what about namespaces?</title>
+ <para>Currently, there is no special support for namespaces.
+However, the parser allows it that the colon occurs in names such that it is
+possible to implement namespaces on top of the current API.</para>
+
+ <para>Some future release of PXP will support namespaces as built-in
+feature...</para>
+ </sect2>
+
+ </sect1>
+
+ </chapter>
+
+<!-- ********************************************************************** -->
+
+ <chapter>
+ <title>Configuring and calling the parser</title>
+
+<!--
+ <para>
+<emphasis>
+Sorry, this chapter has not yet been written. For an introduction into parser
+configuration, see the previous chapters. As a first approximation, the
+interface definition of Markup_yacc outlines what could go here.
+</emphasis>
+</para>
+-->
+
+<!--
+ <para>
+<programlisting>&markup-yacc.mli;</programlisting>
+</para>
+-->
+
+ <sect1>
+ <title>Overview</title>
+ <para>
+There are the following main functions invoking the parser (in Pxp_yacc):
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para><emphasis>parse_document_entity:</emphasis> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+the body is validated against the DTD. This mode is interesting if you have a
+file
+
+<programlisting><![CDATA[
+<!DOCTYPE root ... [ ... ] > <root> ... </root>
+]]></programlisting>
+
+and you can accept any DTD that is included in the file (e.g. because the file
+is under your control).
+</para>
+ </listitem>
+ <listitem>
+ <para><emphasis>parse_wfdocument_entity:</emphasis> You want to
+parse a complete and closed document consisting of a DTD and the document body;
+but the body is not validated, only checked for well-formedness. This mode is
+preferred if validation costs too much time or if the DTD is missing.
+</para>
+ </listitem>
+ <listitem>
+ <para><emphasis>parse_dtd_entity:</emphasis> You want only to
+parse an entity (file) containing the external subset of a DTD. Sometimes it is
+interesting to read such a DTD, for example to compare it with the DTD included
+in a document, or to apply the next mode:
+</para>
+ </listitem>
+ <listitem>
+ <para><emphasis>parse_content_entity:</emphasis> You want only to
+parse an entity (file) containing a fragment of a document body; this fragment
+is validated against the DTD you pass to the function. Especially, the fragment
+must not have a <literal> <!DOCTYPE></literal> clause, and must directly
+begin with an element. The element is validated against the DTD. This mode is
+interesting if you want to check documents against a fixed, immutable DTD.
+</para>
+ </listitem>
+ <listitem>
+ <para><emphasis>parse_wfcontent_entity:</emphasis> This function
+also parses a single element without DTD, but does not validate it.</para>
+ </listitem>
+ <listitem>
+ <para><emphasis>extract_dtd_from_document_entity:</emphasis> This
+function extracts the DTD from a closed document consisting of a DTD and a
+document body. Both the internal and the external subsets are extracted.</para>
+ </listitem>
+ </itemizedlist>
+</para>
+
+<para>
+In many cases, <literal>parse_document_entity</literal> is the preferred mode
+to parse a document in a validating way, and
+<literal>parse_wfdocument_entity</literal> is the mode of choice to parse a
+file while only checking for well-formedness.
+</para>
+
+<para>
+There are a number of variations of these modes. One important application of a
+parser is to check documents of an untrusted source against a fixed DTD. One
+solution is to not allow the <literal><!DOCTYPE></literal> clause in
+these documents, and treat the document like a fragment (using mode
+<emphasis>parse_content_entity</emphasis>). This is very simple, but
+inflexible; users of such a system cannot even define additional entities to
+abbreviate frequent phrases of their text.
+</para>
+
+<para>
+It may be necessary to have a more intelligent checker. For example, it is also
+possible to parse the document to check fully, i.e. with DTD, and to compare
+this DTD with the prescribed one. In order to fully parse the document, mode
+<emphasis>parse_document_entity</emphasis> is applied, and to get the DTD to
+compare with mode <emphasis>parse_dtd_entity</emphasis> can be used.
+</para>
+
+<para>
+There is another very important configurable aspect of the parser: the
+so-called resolver. The task of the resolver is to locate the contents of an
+(external) entity for a given entity name, and to make the contents accessible
+as a character stream. (Furthermore, it also normalizes the character set;
+but this is a detail we can ignore here.) Consider you have a file called
+<literal>"main.xml"</literal> containing
+
+<programlisting><![CDATA[
+<!ENTITY % sub SYSTEM "sub/sub.xml">
+%sub;
+]]></programlisting>
+
+and a file stored in the subdirectory <literal>"sub"</literal> with name
+<literal>"sub.xml"</literal> containing
+
+<programlisting><![CDATA[
+<!ENTITY % subsub SYSTEM "subsub/subsub.xml">
+%subsub;
+]]></programlisting>
+
+and a file stored in the subdirectory <literal>"subsub"</literal> of
+<literal>"sub"</literal> with name <literal>"subsub.xml"</literal> (the
+contents of this file do not matter). Here, the resolver must track that
+the second entity <literal>subsub</literal> is located in the directory
+<literal>"sub/subsub"</literal>, i.e. the difficulty is to interpret the
+system (file) names of entities relative to the entities containing them,
+even if the entities are deeply nested.
+</para>
+
+<para>
+There is not a fixed resolver already doing everything right - resolving entity
+names is a task that highly depends on the environment. The XML specification
+only demands that <literal>SYSTEM</literal> entities are interpreted like URLs
+(which is not very precise, as there are lots of URL schemes in use), hoping
+that this helps overcoming the local peculiarities of the environment; the idea
+is that if you do not know your environment you can refer to other entities by
+denoting URLs for them. I think that this interpretation of
+<literal>SYSTEM</literal> names may have some applications in the internet, but
+it is not the first choice in general. Because of this, the resolver is a
+separate module of the parser that can be exchanged by another one if
+necessary; more precisely, the parser already defines several resolvers.
+</para>
+
+<para>
+The following resolvers do already exist:
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>Resolvers reading from arbitrary input channels. These
+can be configured such that a certain ID is associated with the channel; in
+this case inner references to external entities can be resolved. There is also
+a special resolver that interprets SYSTEM IDs as URLs; this resolver can
+process relative SYSTEM names and determine the corresponding absolute URL.
+</para>
+ </listitem>
+ <listitem>
+ <para>A resolver that reads always from a given O'Caml
+string. This resolver is not able to resolve further names unless the string is
+not associated with any name, i.e. if the document contained in the string
+refers to an external entity, this reference cannot be followed in this
+case.</para>
+ </listitem>
+ <listitem>
+ <para>A resolver for file names. The <literal>SYSTEM</literal>
+name is interpreted as file URL with the slash "/" as separator for
+directories. - This resolver is derived from the generic URL resolver.</para>
+ </listitem>
+ </itemizedlist>
+
+The interface a resolver must have is documented, so it is possible to write
+your own resolver. For example, you could connect the parser with an HTTP
+client, and resolve URLs of the HTTP namespace. The resolver classes support
+that several independent resolvers are combined to one more powerful resolver;
+thus it is possible to combine a self-written resolver with the already
+existing resolvers.
+</para>
+
+<para>
+Note that the existing resolvers only interpret <literal>SYSTEM</literal>
+names, not <literal>PUBLIC</literal> names. If it helps you, it is possible to
+define resolvers for <literal>PUBLIC</literal> names, too; for example, such a
+resolver could look up the public name in a hash table, and map it to a system
+name which is passed over to the existing resolver for system names. It is
+relatively simple to provide such a resolver.
+</para>
+
+
+ </sect1>
+
+ <sect1>
+ <title>Resolvers and sources</title>
+
+ <sect2>
+ <title>Using the built-in resolvers (called sources)</title>
+
+ <para>The type <literal>source</literal> enumerates the two
+possibilities where the document to parse comes from.
+
+<programlisting>
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+</programlisting>
+
+You normally need not to worry about this type as there are convenience
+functions that create <literal>source</literal> values:
+
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para><literal>from_file s</literal>: The document is read from
+file <literal>s</literal>; you may specify absolute or relative path names.
+The file name must be encoded as UTF-8 string.
+</para>
+
+<para>There is an optional argument <literal>~system_encoding</literal>
+specifying the character encoding which is used for the names of the file
+system. For example, if this encoding is ISO-8859-1 and <literal>s</literal> is
+also a ISO-8859-1 string, you can form the source:
+
+<programlisting><![CDATA[
+let s_utf8 = recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
+from_file ~system_encoding:`Enc_iso88591 s_utf8
+]]></programlisting>
+</para>
+
+<para>
+This <literal>source</literal> has the advantage that
+it is able to resolve inner external entities; i.e. if your document includes
+data from another file (using the <literal>SYSTEM</literal> attribute), this
+mode will find that file. However, this mode cannot resolve
+<literal>PUBLIC</literal> identifiers nor <literal>SYSTEM</literal> identifiers
+other than "file:".
+</para>
+ </listitem>
+ <listitem>
+ <para><literal>from_channel ch</literal>: The document is read
+from the channel <literal>ch</literal>. In general, this source also supports
+file URLs found in the document; however, by default only absolute URLs are
+understood. It is possible to associate an ID with the channel such that the
+resolver knows how to interpret relative URLs:
+
+<programlisting>
+from_channel ~id:(System "file:///dir/dir1/") ch
+</programlisting>
+
+There is also the ~system_encoding argument specifying how file names are
+encoded. - The example from above can also be written (but it is no
+longer possible to interpret relative URLs because there is no ~id argument,
+and computing this argument is relatively complicated because it must
+be a valid URL):
+
+<programlisting>
+let ch = open_in s in
+let src = from_channel ~system_encoding:`Enc_iso88591 ch in
+...;
+close_in ch
+</programlisting>
+</para>
+ </listitem>
+ <listitem>
+ <para><literal>from_string s</literal>: The string
+<literal>s</literal> is the document to parse. This mode is not able to
+interpret file names of <literal>SYSTEM</literal> clauses, nor it can look up
+<literal>PUBLIC</literal> identifiers. </para>
+
+ <para>Normally, the encoding of the string is detected as usual
+by analyzing the XML declaration, if any. However, it is also possible to
+specify the encoding directly:
+
+<programlisting>
+let src = from_string ~fixenc:`ISO-8859-2 s
+</programlisting>
+</para>
+ </listitem>
+ <listitem>
+ <para><literal>ExtID (id, r)</literal>: The document to parse
+is denoted by the identifier <literal>id</literal> (either a
+<literal>SYSTEM</literal> or <literal>PUBLIC</literal> clause), and this
+identifier is interpreted by the resolver <literal>r</literal>. Use this mode
+if you have written your own resolver.</para>
+ <para>Which character sets are possible depends on the passed
+resolver <literal>r</literal>.</para>
+ </listitem>
+ <listitem>
+ <para><literal>Entity (get_entity, r)</literal>: The document
+to parse is returned by the function invocation <literal>get_entity
+dtd</literal>, where <literal>dtd</literal> is the DTD object to use (it may be
+empty). Inner external references occuring in this entity are resolved using
+the resolver <literal>r</literal>.</para>
+ <para>Which character sets are possible depends on the passed
+resolver <literal>r</literal>.</para>
+ </listitem>
+ </itemizedlist></para>
+ </sect2>
+
+
+ <sect2>
+ <title>The resolver API</title>
+
+ <para>A resolver is an object that can be opened like a file, but you
+do not pass the file name to the resolver, but the XML identifier of the entity
+to read from (either a <literal>SYSTEM</literal> or <literal>PUBLIC</literal>
+clause). When opened, the resolver must return the
+<literal>Lexing.lexbuf</literal> that reads the characters. The resolver can
+be closed, and it can be cloned. Furthermore, it is possible to tell the
+resolver which character set it should assume. - The following from Pxp_reader:
+
+<programlisting><![CDATA[
+exception Not_competent
+exception Not_resolvable of exn
+
+class type resolver =
+ object
+ method init_rep_encoding : rep_encoding -> unit
+ method init_warner : collect_warnings -> unit
+ method rep_encoding : rep_encoding
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ method close_all : unit
+ end
+]]></programlisting>
+
+The resolver object must work as follows:</para>
+
+<para>
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para>When the parser is called, it tells the resolver the
+warner object and the internal encoding by invoking
+<literal>init_warner</literal> and <literal>init_rep_encoding</literal>. The
+resolver should store these values. The method <literal>rep_encoding</literal>
+should return the internal encoding.
+</para>
+ </listitem>
+ <listitem>
+ <para>If the parser wants to read from the resolver, it invokes
+the method <literal>open_in</literal>. Either the resolver succeeds, in which
+case the <literal>Lexing.lexbuf</literal> reading from the file or stream must
+be returned, or opening fails. In the latter case the method implementation
+should raise an exception (see below).</para>
+ </listitem>
+ <listitem>
+ <para>If the parser finishes reading, it calls the
+<literal>close_in</literal> method.</para>
+ </listitem>
+ <listitem>
+ <para>If the parser finds a reference to another external
+entity in the input stream, it calls <literal>clone</literal> to get a second
+resolver which must be initially closed (not yet connected with an input
+stream). The parser then invokes <literal>open_in</literal> and the other
+methods as described.</para>
+ </listitem>
+ <listitem>
+ <para>If you already know the character set of the input
+stream, you should recode it to the internal encoding, and define the method
+<literal>change_encoding</literal> as an empty method.</para>
+ </listitem>
+ <listitem>
+ <para>If you want to support multiple external character sets,
+the object must follow a much more complicated protocol. Directly after
+<literal>open_in</literal> has been called, the resolver must return a lexical
+buffer that only reads one byte at a time. This is only possible if you create
+the lexical buffer with <literal>Lexing.from_function</literal>; the function
+must then always return 1 if the EOF is not yet reached, and 0 if EOF is
+reached. If the parser has read the first line of the document, it will invoke
+<literal>change_encoding</literal> to tell the resolver which character set to
+assume. From this moment, the object can return more than one byte at once. The
+argument of <literal>change_encoding</literal> is either the parameter of the
+"encoding" attribute of the XML declaration, or the empty string if there is
+not any XML declaration or if the declaration does not contain an encoding
+attribute. </para>
+
+ <para>At the beginning the resolver must only return one
+character every time something is read from the lexical buffer. The reason for
+this is that you otherwise would not exactly know at which position in the
+input stream the character set changes.</para>
+
+ <para>If you want automatic recognition of the character set,
+it is up to the resolver object to implement this.</para>
+ </listitem>
+
+ <listitem><para>If an error occurs, the parser calls the method
+<literal>close_all</literal> for the top-level resolver; this method should
+close itself (if not already done) and all clones.</para>
+ </listitem>
+ </itemizedlist>
+</para>
+ <formalpara><title>Exceptions</title>
+ <para>
+It is possible to chain resolvers such that when the first resolver is not able
+to open the entity, the other resolvers of the chain are tried in turn. The
+method <literal>open_in</literal> should raise the exception
+<literal>Not_competent</literal> to indicate that the next resolver should try
+to open the entity. If the resolver is able to handle the ID, but some other
+error occurs, the exception <literal>Not_resolvable</literal> should be raised
+to force that the chain breaks.
+ </para>
+ </formalpara>
+
+ <para>Example: How to define a resolver that is equivalent to
+from_string: ...</para>
+
+ </sect2>
+
+ <sect2>
+ <title>Predefined resolver components</title>
+ <para>
+There are some classes in Pxp_reader that define common resolver behaviour.
+
+<programlisting><![CDATA[
+class resolve_read_this_channel :
+ ?id:ext_id ->
+ ?fixenc:encoding ->
+ ?auto_close:bool ->
+ in_channel ->
+ resolver
+]]></programlisting>
+
+Reads from the passed channel (it may be even a pipe). If the
+<literal>~id</literal> argument is passed to the object, the created resolver
+accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has
+been cloned, it does not accept any ID. This means that this resolver cannot
+handle inner references to external entities. Note that you can combine this
+resolver with another resolver that can handle inner references (such as
+resolve_as_file); see class 'combine' below. - If you pass the
+<literal>~fixenc</literal> argument, the encoding of the channel is set to the
+passed value, regardless of any auto-recognition or any XML declaration. - If
+<literal>~auto_close = true</literal> (which is the default), the channel is
+closed after use. If <literal>~auto_close = false</literal>, the channel is
+left open.
+ </para>
+
+ <para>
+<programlisting><![CDATA[
+class resolve_read_any_channel :
+ ?auto_close:bool ->
+ channel_of_id:(ext_id -> (in_channel * encoding option)) ->
+ resolver
+]]></programlisting>
+
+This resolver calls the function <literal>~channel_of_id</literal> to open a
+new channel for the passed <literal>ext_id</literal>. This function must either
+return the channel and the encoding, or it must fail with Not_competent. The
+function must return <literal>None</literal> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<literal>Some e</literal> if it is already known that the encoding of the
+channel is <literal>e</literal>. If <literal>~auto_close = true</literal>
+(which is the default), the channel is closed after use. If
+<literal>~auto_close = false</literal>, the channel is left open.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+class resolve_read_url_channel :
+ ?base_url:Neturl.url ->
+ ?auto_close:bool ->
+ url_of_id:(ext_id -> Neturl.url) ->
+ channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
+ resolver
+]]></programlisting>
+
+When this resolver gets an ID to read from, it calls the function
+<literal>~url_of_id</literal> to get the corresponding URL. This URL may be a
+relative URL; however, a URL scheme must be used which contains a path. The
+resolver converts the URL to an absolute URL if necessary. The second
+function, <literal>~channel_of_url</literal>, is fed with the absolute URL as
+input. This function opens the resource to read from, and returns the channel
+and the encoding of the resource.
+</para>
+<para>
+Both functions, <literal>~url_of_id</literal> and
+<literal>~channel_of_url</literal>, can raise Not_competent to indicate that
+the object is not able to read from the specified resource. However, there is a
+difference: A Not_competent from <literal>~url_of_id</literal> is left as it
+is, but a Not_competent from <literal>~channel_of_url</literal> is converted to
+Not_resolvable. So only <literal>~url_of_id</literal> decides which URLs are
+accepted by the resolver and which not.
+</para>
+<para>
+The function <literal>~channel_of_url</literal> must return
+<literal>None</literal> as encoding if the default mechanism to recognize the
+encoding should be used. It must return <literal>Some e</literal> if it is
+already known that the encoding of the channel is <literal>e</literal>.
+</para>
+<para>
+If <literal>~auto_close = true</literal> (which is the default), the channel is
+closed after use. If <literal>~auto_close = false</literal>, the channel is
+left open.
+</para>
+<para>
+Objects of this class contain a base URL relative to which relative URLs are
+interpreted. When creating a new object, you can specify the base URL by
+passing it as <literal>~base_url</literal> argument. When an existing object is
+cloned, the base URL of the clone is the URL of the original object. - Note
+that the term "base URL" has a strict definition in RFC 1808.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+class resolve_read_this_string :
+ ?id:ext_id ->
+ ?fixenc:encoding ->
+ string ->
+ resolver
+]]></programlisting>
+
+Reads from the passed string. If the <literal>~id</literal> argument is passed
+to the object, the created resolver accepts only this ID. Otherwise all IDs are
+accepted. - Once the resolver has been cloned, it does not accept any ID. This
+means that this resolver cannot handle inner references to external
+entities. Note that you can combine this resolver with another resolver that
+can handle inner references (such as resolve_as_file); see class 'combine'
+below. - If you pass the <literal>~fixenc</literal> argument, the encoding of
+the string is set to the passed value, regardless of any auto-recognition or
+any XML declaration.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+class resolve_read_any_string :
+ string_of_id:(ext_id -> (string * encoding option)) ->
+ resolver
+]]></programlisting>
+
+This resolver calls the function <literal>~string_of_id</literal> to get the
+string for the passed <literal>ext_id</literal>. This function must either
+return the string and the encoding, or it must fail with Not_competent. The
+function must return <literal>None</literal> as encoding if the default
+mechanism to recognize the encoding should be used. It must return
+<literal>Some e</literal> if it is already known that the encoding of the
+string is <literal>e</literal>.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+class resolve_as_file :
+ ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?system_encoding:encoding ->
+ ?url_of_id:(ext_id -> Neturl.url) ->
+ ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+ unit ->
+ resolver
+]]></programlisting>
+Reads from the local file system. Every file name is interpreted as
+file name of the local file system, and the referred file is read.
+</para>
+<para>
+The full form of a file URL is: file://host/path, where
+'host' specifies the host system where the file identified 'path'
+resides. host = "" or host = "localhost" are accepted; other values
+will raise Not_competent. The standard for file URLs is
+defined in RFC 1738.
+</para>
+<para>
+Option <literal>~file_prefix</literal>: Specifies how the "file:" prefix of
+file names is handled:
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para><literal>`Not_recognized:</literal>The prefix is not
+recognized.</para>
+ </listitem>
+ <listitem>
+ <para><literal>`Allowed:</literal> The prefix is allowed but
+not required (the default).</para>
+ </listitem>
+ <listitem>
+ <para><literal>`Required:</literal> The prefix is
+required.</para>
+ </listitem>
+ </itemizedlist>
+</para>
+<para>
+Option <literal>~host_prefix:</literal> Specifies how the "//host" phrase of
+file names is handled:
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem>
+ <para><literal>`Not_recognized:</literal>The prefix is not
+recognized.</para>
+ </listitem>
+ <listitem>
+ <para><literal>`Allowed:</literal> The prefix is allowed but
+not required (the default).</para>
+ </listitem>
+ <listitem>
+ <para><literal>`Required:</literal> The prefix is
+required.</para>
+ </listitem>
+ </itemizedlist>
+</para>
+<para>
+Option <literal>~system_encoding:</literal> Specifies the encoding of file
+names of the local file system. Default: UTF-8.
+</para>
+<para>
+Options <literal>~url_of_id</literal>, <literal>~channel_of_url</literal>: Not
+for the casual user!
+</para>
+
+ <para>
+<programlisting><![CDATA[
+class combine :
+ ?prefer:resolver ->
+ resolver list ->
+ resolver
+]]></programlisting>
+
+Combines several resolver objects. If a concrete entity with an
+<literal>ext_id</literal> is to be opened, the combined resolver tries the
+contained resolvers in turn until a resolver accepts opening the entity
+(i.e. it does not raise Not_competent on open_in).
+</para>
+<para>
+Clones: If the 'clone' method is invoked before 'open_in', all contained
+resolvers are cloned separately and again combined. If the 'clone' method is
+invoked after 'open_in' (i.e. while the resolver is open), additionally the
+clone of the active resolver is flagged as being preferred, i.e. it is tried
+first.
+</para>
+
+ </sect2>
+ </sect1>
+
+ <sect1>
+ <title>The DTD classes</title> <para><emphasis>Sorry, not yet
+written. Perhaps the interface definition of Pxp_dtd expresses the same:
+</emphasis></para>
+ <para>
+<programlisting>&markup-dtd1.mli;&markup-dtd2.mli;</programlisting>
+</para>
+ </sect1>
+
+ <sect1>
+ <title>Invoking the parser</title>
+
+ <para>Here a description of Pxp_yacc.</para>
+
+ <sect2>
+ <title>Defaults</title>
+ <para>The following defaults are available:
+
+<programlisting>
+val default_config : config
+val default_extension : ('a node extension) as 'a
+val default_spec : ('a node extension as 'a) spec
+</programlisting>
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Parsing functions</title>
+ <para>In the following, the term "closed document" refers to
+an XML structure like
+
+<programlisting>
+<!DOCTYPE ... [ <replaceable>declarations</replaceable> ] >
+<<replaceable>root</replaceable>>
+...
+</<replaceable>root</replaceable>>
+</programlisting>
+
+The term "fragment" refers to an XML structure like
+
+<programlisting>
+<<replaceable>root</replaceable>>
+...
+</<replaceable>root</replaceable>>
+</programlisting>
+
+i.e. only to one isolated element instance.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+val parse_dtd_entity : config -> source -> dtd
+]]></programlisting>
+
+Parses the declarations which are contained in the entity, and returns them as
+<literal>dtd</literal> object.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+val extract_dtd_from_document_entity : config -> source -> dtd
+]]></programlisting>
+
+Extracts the DTD from a closed document. Both the internal and the external
+subsets are extracted and combined to one <literal>dtd</literal> object. This
+function does not parse the whole document, but only the parts that are
+necessary to extract the DTD.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+val parse_document_entity :
+ ?transform_dtd:(dtd -> dtd) ->
+ ?id_index:('ext index) ->
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext document
+]]></programlisting>
+
+Parses a closed document and validates it against the DTD that is contained in
+the document (internal and external subsets). The option
+<literal>~transform_dtd</literal> can be used to transform the DTD in the
+document, and to use the transformed DTD for validation. If
+<literal>~id_index</literal> is specified, an index of all ID attributes is
+created.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+val parse_wfdocument_entity :
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext document
+]]></programlisting>
+
+Parses a closed document, but checks it only on well-formedness.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+val parse_content_entity :
+ ?id_index:('ext index) ->
+ config ->
+ source ->
+ dtd ->
+ 'ext spec ->
+ 'ext node
+]]></programlisting>
+
+Parses a fragment, and validates the element.
+</para>
+
+ <para>
+<programlisting><![CDATA[
+val parse_wfcontent_entity :
+ config ->
+ source ->
+ 'ext spec ->
+ 'ext node
+]]></programlisting>
+
+Parses a fragment, but checks it only on well-formedness.
+</para>
+ </sect2>
+
+ <sect2>
+ <title>Configuration options</title>
+ <para>
+
+<programlisting><![CDATA[
+type config =
+ { warner : collect_warnings;
+ errors_with_line_numbers : bool;
+ enable_pinstr_nodes : bool;
+ enable_super_root_node : bool;
+ enable_comment_nodes : bool;
+ encoding : rep_encoding;
+ recognize_standalone_declaration : bool;
+ store_element_positions : bool;
+ idref_pass : bool;
+ validate_by_dfa : bool;
+ accept_only_deterministic_models : bool;
+ ...
+ }
+]]></programlisting>
+
+<itemizedlist mark="bullet" spacing="compact">
+ <listitem><para><literal>warner:</literal>The parser prints
+warnings by invoking the method <literal>warn</literal> for this warner
+object. (Default: all warnings are dropped)</para>
+ </listitem>
+ <listitem><para><literal>errors_with_line_numbers:</literal>If
+true, errors contain line numbers; if false, errors contain only byte
+positions. The latter mode is faster. (Default: true)</para>
+ </listitem>
+ <listitem><para><literal>enable_pinstr_nodes:</literal>If true,
+the parser creates extra nodes for processing instructions. If false,
+processing instructions are simply added to the element or document surrounding
+the instructions. (Default: false)</para>
+ </listitem>
+ <listitem><para><literal>enable_super_root_node:</literal>If
+true, the parser creates an extra node which is the parent of the root of the
+document tree. This node is called super root; it is an element with type
+<literal>T_super_root</literal>. - If there are processing instructions outside
+the root element and outside the DTD, they are added to the super root instead
+of the document. - If false, the super root node is not created. (Default:
+false)</para>
+ </listitem>
+ <listitem><para><literal>enable_comment_nodes:</literal>If true,
+the parser creates nodes for comments with type <literal>T_comment</literal>;
+if false, such nodes are not created. (Default: false)</para>
+ </listitem>
+ <listitem><para><literal>encoding:</literal>Specifies the
+internal encoding of the parser. Most strings are then represented according to
+this encoding; however there are some exceptions (especially
+<literal>ext_id</literal> values which are always UTF-8 encoded).
+(Default: `Enc_iso88591)</para>
+ </listitem>
+ <listitem><para><literal>
+recognize_standalone_declaration:</literal> If true and if the parser is
+validating, the <literal>standalone="yes"</literal> declaration forces that it
+is checked whether the document is a standalone document. - If false, or if the
+parser is in well-formedness mode, such declarations are ignored.
+(Default: true)
+</para>
+ </listitem>
+ <listitem><para><literal>store_element_positions:</literal> If
+true, for every non-data node the source position is stored. If false, the
+position information is lost. If available, you can get the positions of nodes
+by invoking the <literal>position</literal> method.
+(Default: true)</para>
+ </listitem>
+ <listitem><para><literal>idref_pass:</literal>If true and if
+there is an ID index, the parser checks whether every IDREF or IDREFS attribute
+refer to an existing node; this requires that the parser traverses the whole
+doument tree. If false, this check is left out. (Default: false)</para>
+ </listitem>
+ <listitem><para><literal>validate_by_dfa:</literal>If true and if
+the content model for an element type is deterministic, a deterministic finite
+automaton is used to validate whether the element contents match the content
+model of the type. If false, or if a DFA is not available, a backtracking
+algorithm is used for validation. (Default: true)
+</para>
+ </listitem>
+ <listitem><para><literal>
+accept_only_deterministic_models:</literal> If true, only deterministic content
+models are accepted; if false, any syntactically correct content models can be
+processed. (Default: true)</para>
+ </listitem>
+ </itemizedlist></para>
+ </sect2>
+
+ <sect2>
+ <title>Which configuration should I use?</title>
+ <para>First, I recommend to vary the default configuration instead of
+creating a new configuration record. For instance, to set
+<literal>idref_pass</literal> to <literal>true</literal>, change the default
+as in:
+<programlisting>
+let config = { default_config with idref_pass = true }
+</programlisting>
+The background is that I can add more options to the record in future versions
+of the parser without breaking your programs.</para>
+
+ <formalpara>
+ <title>Do I need extra nodes for processing instructions?</title>
+<para>By default, such nodes are not created. This does not mean that the
+processing instructions are lost; however, you cannot find out the exact
+location where they occur. For example, the following XML text
+
+<programlisting><![CDATA[
+<x><?pi1?><y/><?pi2?></x>
+]]></programlisting>
+
+will normally create one element node for <literal>x</literal> containing
+<emphasis>one</emphasis> subnode for <literal>y</literal>. The processing
+instructions are attached to <literal>x</literal> in a separate hash table; you
+can access them using <literal>x # pinstr "pi1"</literal> and <literal>x #
+pinstr "pi2"</literal>, respectively. The information is lost where the
+instructions occur within <literal>x</literal>.
+</para>
+ </formalpara>
+
+ <para>If the option <literal>enable_pinstr_nodes</literal> is
+turned on, the parser creates extra nodes <literal>pi1</literal> and
+<literal>pi2</literal> such that the subnodes of <literal>x</literal> are now:
+
+<programlisting><![CDATA[
+x # sub_nodes = [ pi1; y; pi2 ]
+]]></programlisting>
+
+The extra nodes contain the processing instructions in the usual way, i.e. you
+can access them using <literal>pi1 # pinstr "pi1"</literal> and <literal>pi2 #
+pinstr "pi2"</literal>, respectively.
+</para>
+
+ <para>Note that you will need an exemplar for the PI nodes (see
+<literal>make_spec_from_alist</literal>).</para>
+
+ <formalpara>
+ <title>Do I need a super root node?</title>
+ <para>By default, there is no super root node. The
+<literal>document</literal> object refers directly to the node representing the
+root element of the document, i.e.
+
+<programlisting><![CDATA[
+doc # root = r
+]]></programlisting>
+
+if <literal>r</literal> is the root node. This is sometimes inconvenient: (1)
+Some algorithms become simpler if every node has a parent, even the root
+node. (2) Some standards such as XPath call the "root node" the node whose
+child represents the root of the document. (3) The super root node can serve
+as a container for processing instructions outside the root element. Because of
+these reasons, it is possible to create an extra super root node, whose child
+is the root node:
+
+<programlisting><![CDATA[
+doc # root = sr &&
+sr # sub_nodes = [ r ]
+]]></programlisting>
+
+When extra nodes are also created for processing instructions, these nodes can
+be added to the super root node if they occur outside the root element (reason
+(3)), and the order reflects the order in the source text.</para>
+ </formalpara>
+
+ <para>Note that you will need an exemplar for the super root node
+(see <literal>make_spec_from_alist</literal>).</para>
+
+ <formalpara>
+ <title>What is the effect of the UTF-8 encoding?</title>
+ <para>By default, the parser represents strings (with few
+exceptions) as ISO-8859-1 strings. These are well-known, and there are tools
+and fonts for this encoding.</para>
+ </formalpara>
+ <para>However, internationalization may require that you switch over
+to UTF-8 encoding. In most environments, the immediate effect will be that you
+cannot read strings with character codes >= 160 any longer; your terminal will
+only show funny glyph combinations. It is strongly recommended to install
+Unicode fonts (<ulink URL="http://czyborra.com/unifont/">GNU Unifont</ulink>,
+<ulink URL="http://www.cl.cam.ac.uk/~mgk25/download/ucs-fonts.tar.gz">
+Markus Kuhn's fonts</ulink>) and <ulink
+URL="http://myweb.clark.net/pub/dickey/xterm/xterm.html">terminal emulators
+that can handle UTF-8 byte sequences</ulink>. Furthermore, a Unicode editor may
+be helpful (such as <ulink
+URL="ftp://metalab.unc.edu/pub/Linux/apps/editors/X/">Yudit</ulink>). There are
+also <ulink URL="http://www.cl.cam.ac.uk/~mgk25/unicode.html">FAQ</ulink> by
+Markus Kuhn.
+</para>
+ <para>By setting <literal>encoding</literal> to
+<literal>`Enc_utf8</literal> all strings originating from the parsed XML
+document are represented as UTF-8 strings. This includes not only character
+data and attribute values but also element names, attribute names and so on, as
+it is possible to use any Unicode letter to form such names. Strictly
+speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it
+will have difficulties when validating documents containing
+non-ISO-8859-1-names.
+</para>
+
+ <para>This mode does not have any impact on the external
+representation of documents. The character set assumed when reading a document
+is set in the XML declaration, and character set when writing a document must
+be passed to the <literal>write</literal> method.
+</para>
+
+ <formalpara>
+ <title>How do I check that nodes exist which are referred by IDREF attributes?</title>
+ <para>First, you must create an index of all occurring ID
+attributes:
+
+<programlisting><![CDATA[
+let index = new hash_index
+]]></programlisting>
+
+This index must be passed to the parsing function:
+
+<programlisting><![CDATA[
+parse_document_entity
+ ~id_index:(index :> index)
+ config source spec
+]]></programlisting>
+
+Next, you must turn on the <literal>idref_pass</literal> mode:
+
+<programlisting><![CDATA[
+let config = { default_config with idref_pass = true }
+]]></programlisting>
+
+Note that now the whole document tree will be traversed, and every node will be
+checked for IDREF and IDREFS attributes. If the tree is big, this may take some
+time.
+</para>
+ </formalpara>
+
+ <formalpara>
+ <title>What are deterministic content models?</title>
+ <para>These type of models can speed up the validation checks;
+furthermore they ensure SGML-compatibility. In particular, a content model is
+deterministic if the parser can determine the actually used alternative by
+inspecting only the current token. For example, this element has
+non-deterministic contents:
+
+<programlisting><![CDATA[
+<!ELEMENT x ((u,v) | (u,y+) | v)>
+]]></programlisting>
+
+If the first element in <literal>x</literal> is <literal>u</literal>, the
+parser does not know which of the alternatives <literal>(u,v)</literal> or
+<literal>(u,y+)</literal> will work; the parser must also inspect the second
+element to be able to distinguish between the alternatives. Because such
+look-ahead (or "guessing") is required, this example is
+non-deterministic.</para>
+ </formalpara>
+
+ <para>The XML standard demands that content models must be
+deterministic. So it is recommended to turn the option
+<literal>accept_only_deterministic_models</literal> on; however, PXP can also
+process non-deterministic models using a backtracking algorithm.</para>
+
+ <para>Deterministic models ensure that validation can be performed in
+linear time. In order to get the maximum benefits, PXP also implements a
+special validator that profits from deterministic models; this is the
+deterministic finite automaton (DFA). This validator is enabled per element
+type if the element type has a deterministic model and if the option
+<literal>validate_by_dfa</literal> is turned on.</para>
+
+ <para>In general, I expect that the DFA method is faster than the
+backtracking method; especially in the worst case the DFA takes only linear
+time. However, if the content model has only few alternatives and the
+alternatives do not nest, the backtracking algorithm may be better.</para>
+
+ </sect2>
+
+
+ </sect1>
+
+
+ <sect1>
+ <title>Updates</title>
+
+ <para><emphasis>Some (often later added) features that are otherwise
+not explained in the manual but worth to be mentioned.</emphasis></para>
+
+ <itemizedlist mark="bullet" spacing="compact">
+ <listitem><para>Methods node_position, node_path, nth_node,
+previous_node, next_node for nodes: See pxp_document.mli</para>
+ </listitem>
+ <listitem><para>Functions to determine the document order of nodes:
+compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli</para>
+ </listitem>
+ </itemizedlist>
+ </sect1>
+
+ </chapter>
+
+ </part>
+</book>
+
--- /dev/null
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2250 229 229 1575 2250 1800 2295
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 3375 225 225 1575 3375 1800 3375
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 675 3375 229 229 675 3375 900 3420
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2475 3375 229 229 2475 3375 2700 3420
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 2475 180 180 3600 2475 3780 2475
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 2880 2475 180 180 2880 2475 3060 2475
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 4320 2475 186 186 4320 2475 4500 2520
+1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 1485 186 186 3600 1485 3780 1530
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 675 3150 1395 2385
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1575 2475 1575 3150
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1755 2385 2475 3150
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1537 2010 3412 1462
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 3412 1537 1672 2047
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 810 3195 2707 2512
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1740 3217 3442 2580
+2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 2640 3210 4177 2610
+4 0 0 80 0 14 12 0.0000 4 75 105 3555 1530 x\001
+4 0 0 80 0 14 12 0.0000 4 75 105 1530 2295 n\001
+4 0 0 80 0 12 12 0.2967 4 135 1365 1658 1950 n # extension\001
+4 0 0 80 0 12 12 0.2967 4 135 840 2475 1950 x # node\001
+4 0 0 80 0 16 12 0.0000 4 135 1140 1020 4050 The node tree\001
+4 0 0 80 0 16 12 0.0000 4 135 1245 3225 3285 The extensions\001
--- /dev/null
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 1350 242 229 6141 1350 6379 1395
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 2250 242 229 6141 2250 6379 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 5426 2250 242 229 5426 2250 5665 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6856 2250 242 229 6856 2250 7094 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 7571 2925 242 229 7571 2925 7809 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8524 2925 242 229 8524 2925 8762 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8047 2250 242 229 8047 2250 8285 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 1350 242 229 1866 1350 2104 1395
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 2250 242 229 1866 2250 2104 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1151 2250 242 229 1151 2250 1390 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 2581 2250 242 229 2581 2250 2819 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3296 2925 242 229 3296 2925 3534 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 4249 2925 242 229 4249 2925 4487 2970
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3772 2250 242 229 3772 2250 4010 2295
+1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8325 1350 242 229 8325 1350 8563 1395
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 5910 1440 5402 2017
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 6109 1590 6101 2025
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 6307 1537 6697 2070
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 7832 2347 7602 2692
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.76 123.53
+ 8150 2452 8349 2752
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 5490 2017 5958 1492
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 6164 2010 6173 1575
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 6768 2025 6355 1470
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 7673 2715 7880 2415
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.76 123.53
+ 8412 2707 8222 2415
+2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
+ 6387 1372 8023 2017
+2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
+ 4950 900 9000 900 9000 3375 4950 3375 4950 900
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 1635 1440 1127 2017
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 1834 1590 1826 2025
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 2032 1537 2422 2070
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 3557 2347 3327 2692
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 61.75 123.51
+ 3875 2452 4074 2752
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 1215 2017 1683 1492
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 1889 2010 1898 1575
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 2493 2025 2080 1470
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 3398 2715 3605 2415
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 61.75 123.51
+ 4137 2707 3947 2415
+2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2
+ 2112 1372 3748 2017
+2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5
+ 675 900 4725 900 4725 3375 675 3375 675 900
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8197 1545 8055 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 8137 2025 8280 1590
+2 1 0 3 0 7 95 0 -1 0.000 0 0 -1 1 0 4
+ 2 1 2.00 120.00 180.00
+ 7875 1500 7620 1965 7845 1920 7485 2355
+4 0 0 95 0 14 13 0.0000 4 79 111 6094 1379 x\001
+4 0 0 95 0 14 13 0.0000 4 111 111 7991 2265 y\001
+4 0 0 95 0 14 13 0.0000 4 79 111 1819 1379 x\001
+4 0 0 95 0 14 13 0.0000 4 111 111 3716 2265 y\001
+4 0 0 95 0 12 12 0.0000 4 150 1470 6459 1335 x # add_node y\001
+4 0 0 95 0 12 12 0.0000 4 150 1470 2214 1365 x # add_node y\001
--- /dev/null
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6345 2700 229 229 6345 2700 6570 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 5895 3600 229 229 5895 3600 6120 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6795 3600 229 229 6795 3600 7020 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2610 4725 2610
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2745 4725 2745
+2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
+ 4500 2385 4950 2655 4500 2970
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2490 1905 2025 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2827 2002 3202 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2115 2475 2535 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3255 2505 2872 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6135 1905 5670 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6472 2002 6847 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 5760 2475 6180 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6900 2505 6517 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8160 1957 7860 2460
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8407 2032 8625 2520
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 7942 2467 8212 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 8685 2475 8467 1987
+2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
+ 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6382 2460 6382 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6307 2032 6307 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6180 2857 5880 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6427 2932 6645 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 5962 3367 6232 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6705 3375 6487 2887
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2737 2460 2737 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2662 2032 2662 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2535 2857 2235 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2782 2932 3000 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2317 3367 2587 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3060 3375 2842 2887
+4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 75 105 6285 2752 x\001
+4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
+4 0 0 80 0 12 12 0.0000 4 105 840 3690 2025 let x' =\001
+4 0 0 80 0 12 12 0.0000 4 150 1890 3690 2205 x # orphaned_clone\001
+4 0 0 80 0 14 12 0.0000 4 105 210 8235 1845 x'\001
--- /dev/null
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 2550 2092 2865 2407
+2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
+ 2595 2362 2820 2137
+2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2
+ 2595 2137 2820 2362
+-6
+6 1980 2430 3420 3870
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2535 2857 2235 3360
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2782 2932 3000 3420
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2317 3367 2587 2910
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3060 3375 2842 2887
+-6
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745
+1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745
+1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2737 2460 2737 2032
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2662 2032 2662 2467
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2610 4725 2610
+2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2
+ 4050 2745 4725 2745
+2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3
+ 4500 2385 4950 2655 4500 2970
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2490 1905 2025 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2827 2002 3202 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2115 2475 2535 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 3255 2505 2872 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6135 1905 5670 2467
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 6472 2002 6847 2542
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 5760 2475 6180 1965
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 6900 2505 6517 1957
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8160 1957 7860 2460
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 8407 2032 8625 2520
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 7942 2467 8212 2010
+2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 8685 2475 8467 1987
+2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5
+ 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350
+4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001
+4 0 0 95 0 12 12 0.0000 4 135 1050 3960 2250 x # delete\001
+4 0 0 80 0 14 12 0.0000 4 75 105 8280 1845 x\001
+4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001
+4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001
--- /dev/null
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 2025 229 229 2025 2025 2250 2070
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1350 2025 225 225 1350 2025 1575 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2700 2025 225 225 2700 2025 2925 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 1125 225 225 2025 1125 2250 1125
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 1380 1800 1845 1275
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 1815 1207 1282 1815
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2055 1792 2055 1350
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 1980 1350 1980 1807
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 1 1.00 60.00 120.00
+ 2190 1297 2550 1867
+2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2
+ 1 0 1.00 60.00 120.00
+ 2602 1807 2220 1237
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 450 675 3150 675 3150 2475 450 2475 450 675
+4 0 0 100 0 12 10 0.0000 4 120 540 2377 1342 parent\001
+4 0 0 100 0 12 10 0.0000 4 105 810 645 1628 sub_nodes\001
--- /dev/null
+#FIG 3.2
+Portrait
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+6 1665 2700 2835 3150
+2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
+ 2835 3150 2835 2700 1665 2700 1665 3150 2835 3150
+4 0 0 80 0 18 12 0.0000 4 135 930 1815 3015 "Cherries"\001
+-6
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2250 1125 225 225 2250 1125 2475 1125
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2025 225 225 1575 2025 1800 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2925 2025 225 225 2925 2025 3150 2025
+1 3 0 1 0 7 100 0 15 0.000 1 0.0000 900 2925 242 242 900 2925 1125 3015
+2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5
+ 1485 4275 1485 3825 315 3825 315 4275 1485 4275
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2085 1275 1582 1807
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 2407 1297 2940 1800
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1417 2190 900 2692
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 1740 2190 2257 2700
+2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
+ 892 3180 892 3825
+2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
+ 45 675 6525 675 6525 4950 45 4950 45 675
+3 3 0 1 0 7 100 0 -1 0.000 0 0 0 22
+ 2115 3645 2250 3600 2520 3555 2745 3510 2925 3555 3150 3690
+ 3375 3735 3600 3735 3825 3735 4140 3825 4140 4005 4005 4185
+ 3735 4230 3420 4185 3150 4230 2835 4275 2520 4230 2340 4140
+ 2115 4095 1980 4005 1980 3825 2025 3735
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+3 3 0 1 0 7 100 0 -1 0.000 0 0 0 17
+ 3465 1170 3645 1080 4050 1035 4320 1035 4545 1080 4770 1170
+ 5130 1215 5355 1350 5400 1530 5265 1665 4860 1710 4455 1710
+ 4095 1665 3780 1620 3555 1575 3420 1485 3420 1305
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000
+ -1.000
+3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
+ 2475 1215 2655 1350 2970 1440 3240 1395 3420 1260
+ 0.000 -1.000 -1.000 -1.000 0.000
+3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5
+ 1125 3060 1215 3397 1410 3607 1687 3727 2025 3720
+ 0.000 -1.000 -1.000 -1.000 0.000
+4 0 0 80 0 18 12 0.0000 4 180 1065 375 4125 "An orange"\001
+4 0 0 80 0 18 12 0.0000 4 90 315 750 2985 <a>\001
+4 0 0 80 0 18 12 0.0000 4 135 315 1410 2085 <b>\001
+4 0 0 80 0 18 12 0.0000 4 90 315 2790 2070 <c>\001
+4 0 0 80 0 18 12 0.0000 4 90 315 2100 1200 <a>\001
+4 0 0 100 0 16 12 0.0000 4 135 795 3600 1260 attributes:\001
+4 0 0 100 0 16 12 0.0000 4 180 1680 3600 1485 "att" -> Value "apple"\001
+4 0 0 100 0 16 12 0.0000 4 135 795 2250 3780 attributes:\001
+4 0 0 100 0 17 12 0.0000 4 180 5910 390 4725 <a att="apple"><b><a att="orange">An orange</a>Cherries</b><c/></a>\001
+4 0 0 100 0 16 12 0.0000 4 180 1800 2250 4005 "att" -> Value "orange"\001
--- /dev/null
+<!ENTITY readme.code.header '
+open Pxp_types
+open Pxp_document
+'>
+<!ENTITY readme.code.footnote-printer '
+class type footnote_printer =
+ object
+ method footnote_to_html : store_type -> out_channel -> unit
+ end
+
+and store_type =
+ object
+ method alloc_footnote : footnote_printer -> int
+ method print_footnotes : out_channel -> unit
+ end
+;;
+'>
+<!ENTITY readme.code.store '
+class store =
+ object (self)
+
+ val mutable footnotes = ( [] : (int * footnote_printer) list )
+ val mutable next_footnote_number = 1
+
+ method alloc_footnote n =
+ let number = next_footnote_number in
+ next_footnote_number <- number+1;
+ footnotes <- footnotes @ [ number, n ];
+ number
+
+ method print_footnotes ch =
+ if footnotes <> [] then begin
+ output_string ch "<hr align=left noshade=noshade width=\"30&percent;\">\n";
+ output_string ch "<dl>\n";
+ List.iter
+ (fun (_,n) ->
+ n # footnote_to_html (self : #store_type :> store_type) ch)
+ footnotes;
+ output_string ch "</dl>\n";
+ end
+
+ end
+;;
+'>
+<!ENTITY readme.code.escape-html '
+let escape_html s =
+ Str.global_substitute
+ (Str.regexp "<\\|>\\|&\\|\"")
+ (fun s ->
+ match Str.matched_string s with
+ "<" -> "&lt;"
+ | ">" -> "&gt;"
+ | "&" -> "&amp;"
+ | "\"" -> "&quot;"
+ | _ -> assert false)
+ s
+;;
+'>
+<!ENTITY readme.code.shared '
+class virtual shared =
+ object (self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- virtual --- *)
+
+ method virtual to_html : store -> out_channel -> unit
+
+ end
+;;
+'>
+<!ENTITY readme.code.only-data '
+class only_data =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch (escape_html (self # node # data))
+ end
+;;
+'>
+<!ENTITY readme.code.no-markup '
+class no_markup =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes)
+ end
+;;
+'>
+<!ENTITY readme.code.readme '
+class readme =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ (* output header *)
+ output_string
+ ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
+ output_string
+ ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
+ let title =
+ match self # node # attribute "title" with
+ Value s -> s
+ | _ -> assert false
+ in
+ let html_header, _ =
+ try (self # node # dtd # par_entity "readme:html:header")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_trailer, _ =
+ try (self # node # dtd # par_entity "readme:html:trailer")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_bgcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:bgcolor")
+ # replacement_text
+ with WF_error _ -> "white", false in
+ let html_textcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:textcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_alinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:alinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_vlinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_linkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:linkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_background, _ =
+ try (self # node # dtd # par_entity "readme:html:background")
+ # replacement_text
+ with WF_error _ -> "", false in
+
+ output_string ch "<html><header><title>\n";
+ output_string ch (escape_html title);
+ output_string ch "</title></header>\n";
+ output_string ch "<body ";
+ List.iter
+ (fun (name,value) ->
+ if value <> "" then
+ output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+ [ "bgcolor", html_bgcolor;
+ "text", html_textcolor;
+ "link", html_linkcolor;
+ "alink", html_alinkcolor;
+ "vlink", html_vlinkcolor;
+ ];
+ output_string ch ">\n";
+ output_string ch html_header;
+ output_string ch "<h1>";
+ output_string ch (escape_html title);
+ output_string ch "</h1>\n";
+ (* process main content: *)
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ (* now process footnotes *)
+ store # print_footnotes ch;
+ (* trailer *)
+ output_string ch html_trailer;
+ output_string ch "</html>\n";
+
+ end
+;;
+'>
+<!ENTITY readme.code.section '
+class section the_tag =
+ object (self)
+ inherit shared
+
+ val tag = the_tag
+
+ method to_html store ch =
+ let sub_nodes = self # node # sub_nodes in
+ match sub_nodes with
+ title_node :: rest ->
+ output_string ch ("<" ^ tag ^ ">\n");
+ title_node # extension # to_html store ch;
+ output_string ch ("\n</" ^ tag ^ ">");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ rest
+ | _ ->
+ assert false
+ end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;
+'>
+<!ENTITY readme.code.map-tag '
+class map_tag the_target_tag =
+ object (self)
+ inherit shared
+
+ val target_tag = the_target_tag
+
+ method to_html store ch =
+ output_string ch ("<" ^ target_tag ^ ">\n");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</" ^ target_tag ^ ">");
+ end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;
+'>
+<!ENTITY readme.code.br '
+class br =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<br>\n";
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ end
+;;
+'>
+<!ENTITY readme.code.code '
+class code =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ let data = self # node # data in
+ (* convert tabs *)
+ let l = String.length data in
+ let rec preprocess i column =
+ (* this is very ineffective but comprehensive: *)
+ if i < l then
+ match data.[i] with
+ '\t' ->
+ let n = 8 - (column mod 8) in
+ String.make n ' ' ^ preprocess (i+1) (column + n)
+ | '\n' ->
+ "\n" ^ preprocess (i+1) 0
+ | c ->
+ String.make 1 c ^ preprocess (i+1) (column + 1)
+ else
+ ""
+ in
+ output_string ch "<p><pre>";
+ output_string ch (escape_html (preprocess 0 0));
+ output_string ch "</pre></p>";
+
+ end
+;;
+'>
+<!ENTITY readme.code.a '
+class a =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<a ";
+ let href =
+ match self # node # attribute "href" with
+ Value v -> escape_html v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ begin match self # node # attribute "readmeref" with
+ Value v -> escape_html v ^ ".html"
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ ""
+ end
+ in
+ if href <> "" then
+ output_string ch ("href=\"" ^ href ^ "\"");
+ output_string ch ">";
+ output_string ch (escape_html (self # node # data));
+ output_string ch "</a>";
+
+ end
+;;
+'>
+<!ENTITY readme.code.footnote '
+class footnote =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+
+ method to_html store ch =
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ let foot_anchor =
+ "footnote" ^ string_of_int number in
+ let text_anchor =
+ "textnote" ^ string_of_int number in
+ footnote_number <- number;
+ output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^
+ foot_anchor ^ "\">[" ^ string_of_int number ^
+ "]</a>" )
+
+ method footnote_to_html store ch =
+ (* prerequisite: we are in a definition list <dl>...</dl> *)
+ let foot_anchor =
+ "footnote" ^ string_of_int footnote_number in
+ let text_anchor =
+ "textnote" ^ string_of_int footnote_number in
+ output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^
+ text_anchor ^ "\">[" ^ string_of_int footnote_number ^
+ "]</a></dt>\n<dd>");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</dd>")
+
+ end
+;;
+'>
+<!ENTITY readme.code.tag-map '
+open Pxp_yacc
+
+let tag_map =
+ make_spec_from_alist
+ ~data_exemplar:(new data_impl (new only_data))
+ ~default_element_exemplar:(new element_impl (new no_markup))
+ ~element_alist:
+ [ "readme", (new element_impl (new readme));
+ "sect1", (new element_impl (new sect1));
+ "sect2", (new element_impl (new sect2));
+ "sect3", (new element_impl (new sect3));
+ "title", (new element_impl (new no_markup));
+ "p", (new element_impl (new p));
+ "br", (new element_impl (new br));
+ "code", (new element_impl (new code));
+ "em", (new element_impl (new em));
+ "ul", (new element_impl (new ul));
+ "li", (new element_impl (new li));
+ "footnote", (new element_impl (new footnote : #shared :> shared));
+ "a", (new element_impl (new a));
+ ]
+ ()
+;;
+'>
--- /dev/null
+<!ENTITY markup-yacc.mli '
+
+open Pxp_types
+open Pxp_dtd
+open Pxp_document
+
+exception ID_not_unique
+
+class type [ 'ext ] index =
+object
+ (* The type of indexes over the ID attributes of the elements. This type
+ * is the minimum requirement needed by the parser to create such an index.
+ *)
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ (* Add the passed node to the index. If there is already an ID with
+ * the passed string value, the exception ID_not_unique should be
+ * raised. (But the index is free also to accept several identical IDs.)
+ *)
+ method find : string -> 'ext node
+ (* Finds the node with the passed ID value, or raises Not_found *)
+end
+;;
+
+
+class [ 'ext ] hash_index :
+object
+ (* This is a simple implementation of 'index' using a hash table. *)
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ (* See above. *)
+ method find : string -> 'ext node
+ (* See above. *)
+ method index : (string, 'ext node) Hashtbl.t
+ (* Returns the hash table. *)
+end
+;;
+
+
+type config =
+ { warner : collect_warnings;
+ (* An object that collects warnings. *)
+
+ errors_with_line_numbers : bool;
+ (* Whether error messages contain line numbers or not. The parser
+ * is 10 to 20 per cent faster if line numbers are turned off;
+ * you get only byte positions in this case.
+ *)
+
+ enable_pinstr_nodes : bool;
+ (* true: turns a special mode for processing instructions on. Normally,
+ * you cannot determine the exact location of a PI; you only know
+ * in which element the PI occurs. This mode makes it possible
+ * to find the exact location out: Every PI is artificially wrapped
+ * by a special node with type T_pinstr. For example, if the XML text
+ * is <a><?x?><?y?></a>, the parser normally produces only an element
+ * object for "a", and puts the PIs "x" and "y" into it (without
+ * order). In this mode, the object "a" will contain two objects
+ * with type T_pinstr, and the first object will contain "x", and the
+ * second "y": the object tree looks like
+ * - Node with type = T_element "a"
+ * - Node with type = T_pinstr "x"
+ * + contains processing instruction "x"
+ * - Node with type = T_pinstr "y"
+ * + contains processing instruction "y"
+ *
+ * Notes:
+ * (1) In past versions of PXP this mode was called
+ * processing_instructions_inline, and it produced nodes of
+ * type T_element "-pi" instead of T_pinstr.
+ * (2) The T_pinstr nodes are created from the pinstr exemplars
+ * in your spec
+ *)
+
+ enable_super_root_node : bool;
+ (* true: the topmost element of the XML tree is not the root element,
+ * but the so-called super root. The root element is a son of the
+ * super root. The super root is a node with type T_super_root.
+ * The following behaviour changes, too:
+ * - PIs occurring outside the root element and outside the DTD are
+ * added to the super root instead of the document object
+ * - If enable_pinstr_nodes is also turned on, the PI wrappers
+ * are added to the super root
+ *
+ * For example, the document
+ * <?x?><a>y</a><?y?>
+ * is normally represented by:
+ * - document object
+ * + contains PIs x and y
+ * - reference to root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * With enabled super root node:
+ * - document object
+ * - reference to super root node with type = T_super_root
+ * + contains PIs x and y
+ * - root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * If also enable_pinstr_nodes:
+ * - document object
+ * - reference to super root node with type = T_super_root
+ * - node with type = T_pinstr "x"
+ * + contains PI "x"
+ * - root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * - node with type = T_pinstr "y"
+ * + contains PI "y"
+ * Notes:
+ * (1) In previous versions of PXP this mode was called
+ * virtual_root, and it produced an additional node of type
+ * T_element "-vr" instead of T_super_root.
+ * (2) The T_super_root node is created from the super root exemplar
+ * in your spec.
+ *)
+
+ enable_comment_nodes : bool;
+ (* When enabled, comments are represented as nodes with type =
+ * T_comment.
+ * To access the contents of comments, use the method "comment"
+ * for the comment nodes.
+ * These nodes behave like elements; however, they are normally
+ * empty and do not have attributes. Note that it is possible to
+ * add children to comment nodes and to set attributes, but it is
+ * strongly recommended not to do so. There are no checks on
+ * such abnormal use, because they would cost too
+ * much time, even when no comment nodes are generated at all.
+ *
+ * Comment nodes should be disabled unless you must parse a
+ * third-party XML text which uses comments as another data
+ * container.
+ *
+ * The nodes of type T_comment are created from the comment exemplars
+ * in your spec.
+ *)
+
+ encoding : rep_encoding;
+ (* Specifies the encoding used for the *internal* representation
+ * of any character data.
+ * Note that the default is still Enc_iso88591.
+ *)
+
+ recognize_standalone_declaration : bool;
+ (* Whether the "standalone" declaration is recognized or not.
+ * This option does not have an effect on well-formedness parsing:
+ * in this case such declarations are never recognized.
+ *
+ * Recognizing the "standalone" declaration means that the
+ * value of the declaration is scanned and passed to the DTD,
+ * and that the "standalone-check" is performed.
+ *
+ * Standalone-check: If a document is flagged standalone='yes'
+ * some additional constraints apply. The idea is that a parser
+ * without access to any external document subsets can still parse
+ * the document, and will still return the same values as the parser
+ * with such access. For example, if the DTD is external and if
+ * there are attributes with default values, it is checked that there
+ * is no element instance where these attributes are omitted - the
+ * parser would return the default value but this requires access to
+ * the external DTD subset.
+ *)
+
+ store_element_positions : bool;
+ (* Whether the file name, the line and the column of the
+ * beginning of elements are stored in the element nodes.
+ * This option may be useful to generate error messages.
+ *
+ * Positions are only stored for:
+ * - Elements
+ * - Wrapped processing instructions (see enable_pinstr_nodes)
+ * For all other node types, no position is stored.
+ *
+ * You can access positions by the method "position" of nodes.
+ *)
+
+ idref_pass : bool;
+ (* Whether the parser does a second pass and checks that all
+ * IDREF and IDREFS attributes contain valid references.
+ * This option works only if an ID index is available. To create
+ * an ID index, pass an index object as id_index argument to the
+ * parsing functions (such as parse_document_entity; see below).
+ *
+ * "Second pass" does not mean that the XML text is again parsed;
+ * only the existing document tree is traversed, and the check
+ * on bad IDREF/IDREFS attributes is performed for every node.
+ *)
+
+ validate_by_dfa : bool;
+ (* If true, and if DFAs are available for validation, the DFAs will
+ * actually be used for validation.
+ * If false, or if no DFAs are available, the standard backtracking
+ * algorithm will be used.
+ * DFA = deterministic finite automaton.
+ *
+ * DFAs are only available if accept_only_deterministic_models is
+ * "true" (because in this case, it is relatively cheap to construct
+ * the DFAs). DFAs are a data structure which ensures that validation
+ * can always be performed in linear time.
+ *
+ * I strongly recommend using DFAs; however, there are examples
+ * for which validation by backtracking is faster.
+ *)
+
+ accept_only_deterministic_models : bool;
+ (* Whether only deterministic content models are accepted in DTDs. *)
+
+ (* The following options are not implemented, or only for internal
+ * use.
+ *)
+
+ debugging_mode : bool;
+ }
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+
+val from_channel :
+ ?system_encoding:encoding -> ?id:ext_id -> ?fixenc:encoding ->
+ in_channel -> source
+
+val from_string :
+ ?fixenc:encoding -> string -> source
+
+val from_file :
+ ?system_encoding:encoding -> string -> source
+
+(* Notes on sources (version 2):
+ *
+ * Sources specify where the XML text to parse comes from. Sources not only
+ * represent character streams, but also external IDs (i.e. SYSTEM or PUBLIC
+ * names), and they are interpreted as a specific encoding of characters.
+ * A source should be associated with an external ID, because otherwise
+ * it is not known how to handle relative names.
+ *
+ * There are two primary sources, Entity and ExtID, and several functions
+ * for derived sources. First explanations for the functions:
+ *
+ * from_channel: The XML text is read from an in_channel. By default, the
+ * channel is not associated with an external ID, and it is impossible
+ * to resolve relative SYSTEM IDs found in the document.
+ * If the ?id argument is passed, it is assumed that the channel has this
+ * external ID. If relative SYSTEM IDs occur in the document, they can
+ * be interpreted; however, it is only possible to read from "file:"
+ * IDs.
+ * By default, the channel automatically detects the encoding. You can
+ * set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_string: The XML text is read from a string.
+ * It is impossible to read from any external entity whose reference is found
+ * in the string.
+ * By default, the encoding of the string is detected automatically. You can
+ * set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_file: The XML text is read from the file whose file name is
+ * passed to the function (as UTF-8 string).
+ * Relative system IDs can be interpreted by this function.
+ * The ?system_encoding argument specifies the character encoding used
+ * for file names (sic!). By default, UTF-8 is assumed.
+ *
+ * Examples:
+ *
+ * from_file "/tmp/file.xml":
+ * reads from this file, which is assumed to have the ID
+ * SYSTEM "file://localhost/tmp/file.xml".
+ *
+ * let ch = open_in "/tmp/file.xml" in
+ * from_channel ~id:(System "file://localhost/tmp/file.xml") ch
+ * This does the same, but uses a channel.
+ *
+ * from_channel ~id:(System "http://host/file.xml")
+ * ch
+ * reads from the channel ch, and it is assumed that the ID is
+ * SYSTEM "http://host/file.xml". If there is any relative SYSTEM ID,
+ * it will be interpreted relative to this location; however, there is
+ * no way to read via HTTP.
+ * If there is any "file:" SYSTEM ID, it is possible to read the file.
+ *
+ * The primary sources:
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ * entity to read from is passed to the resolver, and the resolver finds
+ * the entity and opens it.
+ * The intention of this option is to allow customized
+ * resolvers to interpret external identifiers without any restriction.
+ * The Pxp_reader module contains several classes allowing the user to
+ * compose such a customized resolver from predefined components.
+ *
+ * ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ * entity class. Once the DTD object d is known that will be used during
+ * parsing, the entity e = m d is determined and used together with the
+ * resolver r.
+ * This is only for hackers.
+ *)
+
+
+
+val default_config : config
+ (* - Warnings are thrown away
+ * - Error messages will contain line numbers
+ * - Neither T_super_root nor T_pinstr nor T_comment nodes are generated
+ * - The internal encoding is ISO-8859-1
+ * - The standalone declaration is checked
+ * - Element positions are stored
+ * - The IDREF pass is left out
+ * - If available, DFAs are used for validation
+ * - Only deterministic content models are accepted
+ *)
+
+val default_extension : ('a node extension) as 'a
+ (* A "null" extension; an extension that does not extend the functionality *)
+
+val default_spec : ('a node extension as 'a) spec
+ (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity : config -> source -> dtd
+ (* Parse an entity containing a DTD (external subset), and return this DTD. *)
+
+val extract_dtd_from_document_entity : config -> source -> dtd
+ (* Parses a closed document, i.e. a document beginning with <!DOCTYPE...>,
+ * and returns the DTD contained in the document.
+ * The parts of the document outside the DTD are actually not parsed,
+ * i.e. parsing stops when all declarations of the DTD have been read.
+ *)
+
+val parse_document_entity :
+ ?transform_dtd:(dtd -> dtd) ->
+ ?id_index:('ext index) ->
+ config -> source -> 'ext spec -> 'ext document
+ (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
+ * and validate the contents of the document against the DTD contained
+ * and/or referenced in the document.
+ *
+ * If the optional argument ~transform_dtd is passed, the following
+ * modification applies: After the DTD (both the internal and external
+ * subsets) has been parsed, the function ~transform_dtd is called,
+ * and the resulting DTD is actually used to validate the document.
+ *
+ * If the optional argument ~transform_dtd is missing, the parser
+ * behaves in the same way as if the identity were passed as ~transform_dtd.
+ *
+ * If the optional argument ~id_index is present, the parser adds
+ * any ID attribute to the passed index. An index is required to detect
+ * violations of the uniqueness of IDs.
+ *)
+
+val parse_wfdocument_entity :
+ config -> source -> 'ext spec -> 'ext document
+ (* Parse a closed document (see parse_document_entity), but do not
+ * validate it. Only checks on well-formedness are performed.
+ *)
+
+val parse_content_entity :
+ ?id_index:('ext index) ->
+ config -> source -> dtd -> 'ext spec -> 'ext node
+ (* Parse a file representing a well-formed fragment of a document. The
+ * fragment must be a single element (i.e. something like <a>...</a>;
+ * not a sequence like <a>...</a><b>...</b>). The element is validated
+ * against the passed DTD, but it is not checked whether the element is
+ * the root element specified in the DTD.
+ *
+ * If the optional argument ~id_index is present, the parser adds
+ * any ID attribute to the passed index. An index is required to detect
+ * violations of the uniqueness of IDs.
+ *)
+
+val parse_wfcontent_entity :
+ config -> source -> 'ext spec -> 'ext node
+ (* Parse a file representing a well-formed fragment of a document
+ * (see parse_content_entity). The fragment is not validated, only
+ * checked for well-formedness.
+ *)
+
+
+'>
--- /dev/null
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C xmlforms CLEAN
+ $(MAKE) -C validate CLEAN
+ $(MAKE) -C readme CLEAN
+ $(MAKE) -C simple_transformation CLEAN
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ $(MAKE) -C xmlforms distclean
+ $(MAKE) -C validate distclean
+ $(MAKE) -C readme distclean
+ $(MAKE) -C simple_transformation distclean
+
+
--- /dev/null
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+depend
+depend.pkg
+
--- /dev/null
+# make readme: make bytecode executable
+# make readme.opt: make native executable
+# make clean: remove intermediate files
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files
+# make install
+#----------------------------------------------------------------------
+
+BIN = /usr/local/bin
+
+.PHONY: readme
+readme:
+ $(MAKE) -f Makefile.code readme
+
+.PHONY: readme.opt
+readme.opt:
+ $(MAKE) -f Makefile.code readme.opt
+
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+ rm -f readme readme.opt
+
+.PHONY: install
+install:
+ cp readme $(BIN)
--- /dev/null
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS = to_html.cmo to_text.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE = readme.cma
+XARCHIVE = readme.cmxa
+NAME = readme
+REQUIRES = str pxp
+
+readme: $(ARCHIVE) main.cmo
+ ocamlfind ocamlc -o readme -custom -package "$(REQUIRES)" \
+ -linkpkg $(ARCHIVE) main.cmo
+
+readme.opt: $(XARCHIVE) main.cmx
+ ocamlfind ocamlopt -o readme.opt -custom -package "$(REQUIRES)" \
+ -linkpkg $(XARCHIVE) main.cmx
+
+$(ARCHIVE): $(OBJECTS)
+ $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+ $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+ $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+*.mli:
+
+include depend
+include depend.pkg
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+
+
+let rec print_error e =
+ prerr_endline(string_of_exn e)
+;;
+
+
+let run f a =
+ try f a with
+ e -> print_error e
+;;
+
+
+let convert_to_html filename =
+ (* read in style definition *)
+ let document =
+ parse_document_entity
+ { default_config with encoding = `Enc_iso88591 }
+ (from_file filename)
+ To_html.tag_map
+ in
+ let root = document # root in
+ let store = new To_html.store in
+ root # extension # to_html store stdout
+;;
+
+
+let convert_to_text filename =
+ (* read in style definition *)
+ let document =
+ parse_document_entity
+ default_config
+ (from_file filename)
+ To_text.tag_map
+ in
+ let root = document # root in
+ let store = new To_text.store in
+ let box = new To_text.box 79 79 in
+ root # extension # to_box store box;
+ box # output 0 0 stdout
+;;
+
+
+let main() =
+ let want_html = ref false in
+ let want_text = ref false in
+ let filename = ref None in
+ Arg.parse
+ [ "-html", Arg.Set want_html,
+ " convert file to html";
+ "-text", Arg.Set want_text,
+ " convert file to text";
+ ]
+ (fun s ->
+ match !filename with
+ None -> filename := Some s
+ | Some _ ->
+ raise (Arg.Bad "Multiple arguments not allowed."))
+ "usage: readme [ -text | -html ] input.xml >output";
+ let fn =
+ match !filename with
+ None ->
+ prerr_endline "readme: no input";
+ exit 1
+ | Some s -> s
+ in
+ match !want_html, !want_text with
+ true, false ->
+ run convert_to_html fn
+ | false, true ->
+ run convert_to_text fn
+ | _ ->
+ prerr_endline ("readme: Please select exactly one output format")
+;;
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/07/08 17:58:17 gerd
+ * Updated because of PXP API changes.
+ *
+ * Revision 1.4 2000/06/04 20:25:38 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.3 2000/05/01 16:46:40 gerd
+ * Using the new error formatter.
+ *
+ * Revision 1.2 1999/08/23 16:54:19 gerd
+ * Minor changes.
+ *
+ * Revision 1.1 1999/08/22 22:29:32 gerd
+ * Initial revision.
+ *
+ *)
--- /dev/null
+<!-- $Id -->
+
+<!ENTITY % p.like "p|ul">
+<!ENTITY % text "br|code|em|footnote|a">
+
+<!ELEMENT readme (sect1+)>
+<!ATTLIST readme
+ title CDATA #REQUIRED>
+
+<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
+
+<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
+
+<!ELEMENT sect3 (title,(%p.like;)+)>
+
+<!ELEMENT title (#PCDATA|br)*>
+
+<!ELEMENT p (#PCDATA|%text;)*>
+
+<!ELEMENT br EMPTY>
+
+<!ELEMENT code (#PCDATA)>
+
+<!ELEMENT em (#PCDATA|%text;)*>
+
+<!ELEMENT ul (li+)>
+
+<!ELEMENT li (%p.like;)*>
+
+<!ELEMENT footnote (#PCDATA|%text;)*>
+
+<!ELEMENT a (#PCDATA)*>
+<!ATTLIST a
+ href CDATA #IMPLIED
+ readmeref CDATA #IMPLIED
+>
+
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+(*$ readme.code.header *)
+open Pxp_types
+open Pxp_document
+(*$-*)
+
+
+(*$ readme.code.footnote-printer *)
+class type footnote_printer =
+ object
+ method footnote_to_html : store_type -> out_channel -> unit
+ end
+
+and store_type =
+ object
+ method alloc_footnote : footnote_printer -> int
+ method print_footnotes : out_channel -> unit
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.store *)
+class store =
+ object (self)
+
+ val mutable footnotes = ( [] : (int * footnote_printer) list )
+ val mutable next_footnote_number = 1
+
+ method alloc_footnote n =
+ let number = next_footnote_number in
+ next_footnote_number <- number+1;
+ footnotes <- footnotes @ [ number, n ];
+ number
+
+ method print_footnotes ch =
+ if footnotes <> [] then begin
+ output_string ch "<hr align=left noshade=noshade width=\"30%\">\n";
+ output_string ch "<dl>\n";
+ List.iter
+ (fun (_,n) ->
+ n # footnote_to_html (self : #store_type :> store_type) ch)
+ footnotes;
+ output_string ch "</dl>\n";
+ end
+
+ end
+;;
+(*$-*)
+
+
+
+(*$ readme.code.escape-html *)
+let escape_html s =
+ Str.global_substitute
+ (Str.regexp "<\\|>\\|&\\|\"")
+ (fun s ->
+ match Str.matched_string s with
+ "<" -> "<"
+ | ">" -> ">"
+ | "&" -> "&"
+ | "\"" -> """
+ | _ -> assert false)
+ s
+;;
+(*$-*)
+
+
+(*$ readme.code.shared *)
+class virtual shared =
+ object (self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- virtual --- *)
+
+ method virtual to_html : store -> out_channel -> unit
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.only-data *)
+class only_data =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch (escape_html (self # node # data))
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.no-markup *)
+class no_markup =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes)
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.readme *)
+class readme =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ (* output header *)
+ output_string
+ ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
+ output_string
+ ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
+ let title =
+ match self # node # attribute "title" with
+ Value s -> s
+ | _ -> assert false
+ in
+ let html_header, _ =
+ try (self # node # dtd # par_entity "readme:html:header")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_trailer, _ =
+ try (self # node # dtd # par_entity "readme:html:trailer")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_bgcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:bgcolor")
+ # replacement_text
+ with WF_error _ -> "white", false in
+ let html_textcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:textcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_alinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:alinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_vlinkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_linkcolor, _ =
+ try (self # node # dtd # par_entity "readme:html:linkcolor")
+ # replacement_text
+ with WF_error _ -> "", false in
+ let html_background, _ =
+ try (self # node # dtd # par_entity "readme:html:background")
+ # replacement_text
+ with WF_error _ -> "", false in
+
+ output_string ch "<html><header><title>\n";
+ output_string ch (escape_html title);
+ output_string ch "</title></header>\n";
+ output_string ch "<body ";
+ List.iter
+ (fun (name,value) ->
+ if value <> "" then
+ output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+ [ "bgcolor", html_bgcolor;
+ "text", html_textcolor;
+ "link", html_linkcolor;
+ "alink", html_alinkcolor;
+ "vlink", html_vlinkcolor;
+ ];
+ output_string ch ">\n";
+ output_string ch html_header;
+ output_string ch "<h1>";
+ output_string ch (escape_html title);
+ output_string ch "</h1>\n";
+ (* process main content: *)
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ (* now process footnotes *)
+ store # print_footnotes ch;
+ (* trailer *)
+ output_string ch html_trailer;
+ output_string ch "</html>\n";
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.section *)
+class section the_tag =
+ object (self)
+ inherit shared
+
+ val tag = the_tag
+
+ method to_html store ch =
+ let sub_nodes = self # node # sub_nodes in
+ match sub_nodes with
+ title_node :: rest ->
+ output_string ch ("<" ^ tag ^ ">\n");
+ title_node # extension # to_html store ch;
+ output_string ch ("\n</" ^ tag ^ ">");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ rest
+ | _ ->
+ assert false
+ end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;
+(*$-*)
+
+
+(*$ readme.code.map-tag *)
+class map_tag the_target_tag =
+ object (self)
+ inherit shared
+
+ val target_tag = the_target_tag
+
+ method to_html store ch =
+ output_string ch ("<" ^ target_tag ^ ">\n");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</" ^ target_tag ^ ">");
+ end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;
+(*$-*)
+
+
+(*$ readme.code.br *)
+class br =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<br>\n";
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.code *)
+class code =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ let data = self # node # data in
+ (* convert tabs *)
+ let l = String.length data in
+ let rec preprocess i column =
+ (* this is very ineffective but comprehensive: *)
+ if i < l then
+ match data.[i] with
+ '\t' ->
+ let n = 8 - (column mod 8) in
+ String.make n ' ' ^ preprocess (i+1) (column + n)
+ | '\n' ->
+ "\n" ^ preprocess (i+1) 0
+ | c ->
+ String.make 1 c ^ preprocess (i+1) (column + 1)
+ else
+ ""
+ in
+ output_string ch "<p><pre>";
+ output_string ch (escape_html (preprocess 0 0));
+ output_string ch "</pre></p>";
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.a *)
+class a =
+ object (self)
+ inherit shared
+
+ method to_html store ch =
+ output_string ch "<a ";
+ let href =
+ match self # node # attribute "href" with
+ Value v -> escape_html v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ begin match self # node # attribute "readmeref" with
+ Value v -> escape_html v ^ ".html"
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ ""
+ end
+ in
+ if href <> "" then
+ output_string ch ("href=\"" ^ href ^ "\"");
+ output_string ch ">";
+ output_string ch (escape_html (self # node # data));
+ output_string ch "</a>";
+
+ end
+;;
+(*$-*)
+
+
+(*$ readme.code.footnote *)
+class footnote =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+
+ method to_html store ch =
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ let foot_anchor =
+ "footnote" ^ string_of_int number in
+ let text_anchor =
+ "textnote" ^ string_of_int number in
+ footnote_number <- number;
+ output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^
+ foot_anchor ^ "\">[" ^ string_of_int number ^
+ "]</a>" )
+
+ method footnote_to_html store ch =
+ (* prerequisite: we are in a definition list <dl>...</dl> *)
+ let foot_anchor =
+ "footnote" ^ string_of_int footnote_number in
+ let text_anchor =
+ "textnote" ^ string_of_int footnote_number in
+ output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^
+ text_anchor ^ "\">[" ^ string_of_int footnote_number ^
+ "]</a></dt>\n<dd>");
+ List.iter
+ (fun n -> n # extension # to_html store ch)
+ (self # node # sub_nodes);
+ output_string ch ("\n</dd>")
+
+ end
+;;
+(*$-*)
+
+
+(**********************************************************************)
+
+(*$ readme.code.tag-map *)
+open Pxp_yacc
+
+let tag_map =
+ make_spec_from_alist
+ ~data_exemplar:(new data_impl (new only_data))
+ ~default_element_exemplar:(new element_impl (new no_markup))
+ ~element_alist:
+ [ "readme", (new element_impl (new readme));
+ "sect1", (new element_impl (new sect1));
+ "sect2", (new element_impl (new sect2));
+ "sect3", (new element_impl (new sect3));
+ "title", (new element_impl (new no_markup));
+ "p", (new element_impl (new p));
+ "br", (new element_impl (new br));
+ "code", (new element_impl (new code));
+ "em", (new element_impl (new em));
+ "ul", (new element_impl (new ul));
+ "li", (new element_impl (new li));
+ "footnote", (new element_impl (new footnote : #shared :> shared));
+ "a", (new element_impl (new a));
+ ]
+ ()
+;;
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/08/22 14:34:25 gerd
+ * Using make_spec_from_alist instead of make_spec_from_mapping.
+ *
+ * Revision 1.5 2000/08/18 21:15:14 gerd
+ * Update because of PXP API change: par_entity raises WF_error
+ * instead of Validation error if the entity is not defined.
+ * Further minor updates.
+ *
+ * Revision 1.4 2000/07/08 17:58:17 gerd
+ * Updated because of PXP API changes.
+ *
+ * Revision 1.3 2000/06/04 20:25:38 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 1999/09/12 20:09:32 gerd
+ * Added section marks.
+ *
+ * Revision 1.1 1999/08/22 22:29:32 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+
+
+(**********************************************************************)
+(* The box class represents formatted text *)
+(**********************************************************************)
+
+class type formatted_text =
+ object
+ method output : int -> int -> out_channel -> unit
+ (* output initial_indent indent ch:
+ * 'initial_indent' is how far the first line should be indented;
+ * 'indent' how far the rest. 'ch' is the channel on which the lines
+ * are to be printed.
+ *)
+
+ method multiline : bool
+ (* whether the box occupies multiple lines *)
+
+ method width_of_last_line : int
+ (* returns the width of the last line *)
+ end
+;;
+
+
+type text =
+ Text of string
+ | Box of formatted_text
+;;
+
+
+let textwidth tl =
+ let rec compute tl r =
+ match tl with
+ [] -> r
+ | t :: tl' ->
+ begin match t with
+ Text s ->
+ compute tl' (r + String.length s)
+ | Box b ->
+ if b # multiline then
+ compute tl' (b # width_of_last_line)
+ else
+ compute tl' (r + b # width_of_last_line)
+ end
+ in
+ compute (List.rev tl) 0
+;;
+
+
+class box the_initial_width the_width =
+ object (self)
+
+ (* The 'initial_width' is the width that is available on the first
+ * line of output; the 'width' is the width that is available in the
+ * rest.
+ *)
+
+ val initial_width = the_initial_width
+ val width = the_width
+
+ (* state: *)
+
+ val mutable space_added = false
+ val mutable linefeed_added = false
+ val mutable is_first_line = true
+ val mutable lines = []
+ (* lines in reverse order (first line = last element) *)
+ val mutable current_line = []
+ (* not member of 'lines'; again reverse order *)
+ val mutable current_indent = 0
+
+ method add_space =
+ if not space_added then begin
+ space_added <- true;
+ linefeed_added <- true;
+ current_line <- Text " " :: current_line
+ end
+
+ method ignore_space =
+ space_added <- true;
+ linefeed_added <- true
+
+ method add_linefeed =
+ if not linefeed_added then begin
+ linefeed_added <- true;
+ if not space_added then
+ current_line <- Text " " :: current_line
+ end
+
+ method ignore_linefeed =
+ linefeed_added <- true
+
+ method add_newline =
+ lines <- current_line :: lines;
+ current_line <- [];
+ space_added <- true;
+ linefeed_added <- true;
+ is_first_line <- false;
+ current_indent <- 0;
+
+ method add_word s =
+ (* first try to add 's' to 'current_line' *)
+ let current_line' = Text s :: current_line in
+ let current_width =
+ if is_first_line then initial_width else width in
+ if textwidth current_line' + current_indent <= current_width then begin
+ (* ok, the line does not become too long *)
+ current_line <- current_line';
+ space_added <- false;
+ linefeed_added <- false
+ end
+ else begin
+ (* The line would be too long. *)
+ lines <- current_line :: lines;
+ current_line <- [Text s];
+ space_added <- false;
+ linefeed_added <- false;
+ is_first_line <- false;
+ current_indent <- 0;
+ end
+
+ method add_box b =
+ current_line <- Box b :: current_line;
+ space_added <- false;
+ linefeed_added <- false;
+
+
+ method width_of_last_line =
+ textwidth current_line + current_indent
+
+
+ method available_width =
+ let current_width =
+ if is_first_line then initial_width else width in
+ current_width - textwidth current_line - current_indent
+
+
+ method multiline =
+ lines <> [] or
+ (List.exists
+ (function
+ Text _ -> false
+ | Box b -> b # multiline)
+ current_line)
+
+ method output initial_indent indent ch =
+ let eff_lines =
+ List.rev
+ (current_line :: lines) in
+ let rec out_lines cur_indent ll =
+ match ll with
+ [] -> ()
+ | l :: ll' ->
+ output_string ch (String.make cur_indent ' ');
+ List.iter
+ (function
+ Text s ->
+ output_string ch s
+ | Box b ->
+ b # output 0 indent ch
+ )
+ (List.rev l);
+ if ll' <> [] then
+ output_string ch "\n";
+ out_lines indent ll'
+ in
+ out_lines initial_indent eff_lines
+ end
+;;
+
+
+class listitem_box listmark indent totalwidth =
+ let initial_newline = String.length listmark >= indent in
+ object (self)
+ inherit box totalwidth (totalwidth - indent) as super
+
+ val extra_indent = indent
+
+ initializer
+ self # add_word listmark;
+ if initial_newline then
+ self # add_newline
+ else begin
+ current_line <- Text (String.make (indent - String.length listmark) ' ')
+ :: current_line;
+ space_added <- true;
+ linefeed_added <- true;
+ end
+
+
+ method output initial_indent indent ch =
+ super # output initial_indent (indent + extra_indent) ch
+ end
+;;
+
+
+(**********************************************************************)
+(* Footnotes etc. *)
+(**********************************************************************)
+
+
+class type footnote_printer =
+ object
+ method footnote_to_box : store_type -> box -> unit
+ end
+
+and store_type =
+ object
+ method alloc_footnote : footnote_printer -> int
+ method print_footnotes : box -> unit
+ end
+;;
+
+
+class store =
+ object (self)
+
+ val mutable footnotes = ( [] : (int * footnote_printer) list )
+ val mutable next_footnote_number = 1
+
+ method alloc_footnote n =
+ let number = next_footnote_number in
+ next_footnote_number <- number+1;
+ footnotes <- footnotes @ [ number, n ];
+ number
+
+ method print_footnotes (b : box) =
+ if footnotes <> [] then begin
+ b # add_newline;
+ b # add_newline;
+ let w = b # available_width in
+ b # add_word (String.make (w/3) '-');
+ b # add_newline;
+ b # add_newline;
+ List.iter
+ (fun (_,n) ->
+ n # footnote_to_box (self : #store_type :> store_type) b)
+ footnotes;
+ b # add_newline;
+ end
+ end
+;;
+
+
+
+(**********************************************************************)
+(* The extension objects *)
+(**********************************************************************)
+
+
+class virtual shared =
+ object (self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- virtual --- *)
+
+ method virtual to_box : store -> box -> unit
+ (* to_box store b:
+ * formats the element using box 'b'
+ *)
+ end
+;;
+
+
+class only_data =
+ object (self)
+ inherit shared
+
+ val white_space_re = Str.regexp "[ \t]+\\|\n"
+
+ method to_box store b =
+ let s = self # node # data in
+ let splitted = Str.full_split white_space_re s in
+ List.iter
+ (function
+ Str.Delim "\n" ->
+ b # add_linefeed
+ | Str.Delim _ ->
+ b # add_space
+ | Str.Text s ->
+ b # add_word s)
+ splitted
+ end
+;;
+
+
+class no_markup =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ List.iter
+ (fun n -> n # extension # to_box store b)
+ (self # node # sub_nodes)
+ end
+;;
+
+
+class readme =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ let title =
+ match self # node # attribute "title" with
+ Value s -> s
+ | _ -> assert false
+ in
+ let w = b # available_width in
+ let line = String.make (w-1) '*' in
+ b # add_word line;
+ b # add_newline;
+ b # add_word title;
+ b # add_newline;
+ b # add_word line;
+ b # add_newline;
+ b # add_newline;
+ (* process main content: *)
+ List.iter
+ (fun n -> n # extension # to_box store b)
+ (self # node # sub_nodes);
+ (* now process footnotes *)
+ store # print_footnotes b;
+ (* trailer *)
+ b # add_newline;
+ end
+;;
+
+
+class section the_tag =
+ object (self)
+ inherit shared
+
+ val tag = the_tag
+
+ method to_box store b =
+ let sub_nodes = self # node # sub_nodes in
+ match sub_nodes with
+ title_node :: rest ->
+ b # add_newline;
+ let w = b # available_width in
+ let line = String.make (w-1) tag in
+ b # add_word line;
+ b # add_newline;
+ b # add_word (title_node # data);
+ b # add_newline;
+ b # add_word line;
+ b # add_newline;
+ List.iter
+ (fun n ->
+ n # extension # to_box store b)
+ rest;
+ | _ ->
+ assert false
+ end
+;;
+
+class sect1 = section '=';;
+class sect2 = section '-';;
+class sect3 = section ':';;
+
+
+class p =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ let within_list =
+ match self # node # parent # node_type with
+ T_element "li" -> true
+ | T_element _ -> false
+ | _ -> assert false
+ in
+ if not within_list then
+ b # add_newline;
+ let w = b # available_width in
+ let b' = new box w w in
+ b' # ignore_space;
+ List.iter
+ (fun n -> n # extension # to_box store b')
+ (self # node # sub_nodes);
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ end
+;;
+
+
+class li =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ b # add_newline;
+ let w = b # available_width in
+ let b' = new listitem_box "-" 3 w in
+ b' # ignore_space;
+ List.iter
+ (fun n -> n # extension # to_box store b')
+ (self # node # sub_nodes);
+ b # add_box (b' :> formatted_text);
+ end
+;;
+
+
+class code =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ b # add_newline;
+ let w = b # available_width in
+ let b' = new box w w in
+ b' # ignore_space;
+ let data = self # node # data in
+ (* convert tabs *)
+ let l = String.length data in
+ let rec add s i column =
+ (* this is very ineffective but comprehensive: *)
+ if i < l then
+ match data.[i] with
+ '\t' ->
+ let n = 8 - (column mod 8) in
+ add (s ^ String.make n ' ') (i+1) (column + n)
+ | '\n' ->
+ b' # add_word s;
+ b' # add_newline;
+ add "" (i+1) 0
+ | c ->
+ add (s ^ String.make 1 c) (i+1) (column + 1)
+ else
+ if s <> "" then begin
+ b' # add_word s;
+ b' # add_newline;
+ end
+ in
+ add "" 0 0;
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ end
+;;
+
+
+class br =
+ object (self)
+ inherit shared
+
+ method to_box store b =
+ b # add_newline;
+ end
+;;
+
+
+class footnote =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+
+ method to_box store b =
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ footnote_number <- number;
+ b # add_space;
+ b # add_word ("[" ^ string_of_int number ^ "]");
+
+ method footnote_to_box store b =
+ let w = b # available_width in
+ let n = "[" ^ string_of_int footnote_number ^ "]" in
+ let b' = new listitem_box n 6 w in
+ b' # ignore_space;
+ List.iter
+ (fun n -> n # extension # to_box store b')
+ (self # node # sub_nodes);
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ b # add_newline;
+
+ end
+;;
+
+
+class a =
+ object (self)
+ inherit shared
+
+ val mutable footnote_number = 0
+ val mutable a_href = ""
+
+ method to_box store b =
+ let href =
+ match self # node # attribute "href" with
+ Value v -> "see " ^ v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ begin match self # node # attribute "readmeref" with
+ Value v -> "see file " ^ v
+ | Valuelist _ -> assert false
+ | Implied_value ->
+ ""
+ end
+ in
+ a_href <- href;
+ List.iter
+ (fun n -> n # extension # to_box store b)
+ (self # node # sub_nodes);
+ if href <> "" then begin
+ let number =
+ store # alloc_footnote (self : #shared :> footnote_printer) in
+ footnote_number <- number;
+ b # add_space;
+ b # add_word ("[" ^ string_of_int number ^ "]");
+ end
+
+ method footnote_to_box store b =
+ if a_href <> "" then begin
+ let w = b # available_width in
+ let n = "[" ^ string_of_int footnote_number ^ "]" in
+ let b' = new listitem_box n 6 w in
+ b' # ignore_space;
+ b' # add_word a_href;
+ b # add_box (b' :> formatted_text);
+ b # add_newline;
+ b # add_newline;
+ end
+ end
+;;
+
+(**********************************************************************)
+
+open Pxp_yacc
+
+let tag_map =
+ make_spec_from_alist
+ ~data_exemplar:(new data_impl (new only_data))
+ ~default_element_exemplar:(new element_impl (new no_markup))
+ ~element_alist:
+ [ "readme", (new element_impl (new readme));
+ "sect1", (new element_impl (new sect1));
+ "sect2", (new element_impl (new sect2));
+ "sect3", (new element_impl (new sect3));
+ "title", (new element_impl (new no_markup));
+ "p", (new element_impl (new p));
+ "br", (new element_impl (new br));
+ "code", (new element_impl (new code));
+ "em", (new element_impl (new no_markup));
+ "ul", (new element_impl (new no_markup));
+ "li", (new element_impl (new li));
+ "footnote", (new element_impl (new footnote : #shared :> shared));
+ "a", (new element_impl (new a : #shared :> shared));
+ ]
+ ()
+;;
+
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/08/22 14:34:25 gerd
+ * Using make_spec_from_alist instead of make_spec_from_mapping.
+ *
+ * Revision 1.4 2000/08/18 21:15:25 gerd
+ * Minor updates because of PXP API changes.
+ *
+ * Revision 1.3 2000/07/08 17:58:17 gerd
+ * Updated because of PXP API changes.
+ *
+ * Revision 1.2 2000/06/04 20:25:38 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.1 1999/08/22 22:29:32 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+all: print sort delcol
+
+print: print.ml
+ ocamlfind ocamlc -o print -package pxp -linkpkg -custom \
+ -predicates pxp_without_utf8 print.ml
+
+sort: sort.ml
+ ocamlfind ocamlc -o sort -package pxp -linkpkg -custom \
+ -predicates pxp_without_utf8 sort.ml
+
+delcol: delcol.ml
+ ocamlfind ocamlc -o delcol -package pxp -linkpkg -custom \
+ -predicates pxp_without_utf8 delcol.ml
+
+clean:
+ rm -f *.cmo *.cma *.cmi *.cmxa *.a *.o
+
+distclean: clean
+ rm -f *~ print sort delcol
+
+CLEAN: clean
--- /dev/null
+Usage:
+ sort -by phone <sample.xml | print
+
+once sort and print are compiled.
+
+These examples illustrate iter_tree, map_tree and find_element.
+
+
+sort: reads an XML file from stdin, sorts the records, and prints the
+ result as XML.
+delcol: reads an XML file from stdin, deletes a column from all records,
+ and prints the result as XML.
+print: reads an XML file from stdin, and pretty-prints the file
+
+The XML file must not contain a DTD. The programs assume the fixed DTD
+record.dtd.
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list, delete a column, and print it as XML *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let delcol col tree =
+ map_tree
+ ~pre:
+ (fun n ->
+ match n # node_type with
+ T_element name when name = col ->
+ raise Skip
+ | _ -> n # orphaned_flat_clone)
+ tree
+;;
+
+
+let main() =
+ let column = ref "" in
+ Arg.parse
+ [ "-col", Arg.String (fun s -> column := s),
+ " (last-name|first-name|phone)";
+ ]
+ (fun _ -> raise (Arg.Bad "Bad usage"))
+ "usage: sort [ options ]";
+ if !column = "" then (
+ prerr_endline "Column not specified!";
+ exit 1;
+ );
+ if not(List.mem !column ["last-name"; "first-name"; "phone"]) then (
+ prerr_endline ("Unknown column: " ^ !column);
+ exit 1
+ );
+ try
+ let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+ let tree =
+ parse_content_entity default_config (from_channel stdin) dtd default_spec
+ in
+ print_endline "<?xml encoding='ISO-8859-1'?>";
+ (delcol !column tree) # write (Out_channel stdout) `Enc_iso88591
+ with
+ x ->
+ prerr_endline(string_of_exn x);
+ exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/24 09:42:52 gerd
+ * Updated a comment.
+ *
+ * Revision 1.1 2000/08/24 09:39:59 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list structure and print it *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let print tree =
+ iter_tree
+ ~pre:
+ (fun n ->
+ match n # node_type with
+ T_element "last-name" ->
+ print_endline ("Last name: " ^ n # data)
+ | T_element "first-name" ->
+ print_endline ("First name: " ^ n # data)
+ | T_element "phone" ->
+ print_endline ("Telephone number: " ^ n # data)
+ | _ ->
+ ())
+ ~post:
+ (fun n ->
+ match n # node_type with
+ T_element "record" ->
+ print_newline()
+ | _ ->
+ ())
+ tree
+;;
+
+let main() =
+ try
+ let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+ let tree =
+ parse_content_entity default_config (from_channel stdin) dtd default_spec in
+ print tree
+ with
+ x ->
+ prerr_endline(string_of_exn x);
+ exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/08/22 21:57:43 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+<!ELEMENT record-list (record*)>
+<!ELEMENT record (last-name?, first-name?, phone?)>
+<!ELEMENT last-name (#PCDATA)>
+<!ELEMENT first-name (#PCDATA)>
+<!ELEMENT phone (#PCDATA)>
--- /dev/null
+<?xml encoding="ISO-8859-1"?>
+<record-list>
+ <record>
+ <last-name>Stolpmann</last-name>
+ <first-name>Gerd</first-name>
+ <phone>997705</phone>
+ </record>
+ <record>
+ <last-name>Smith</last-name>
+ <first-name>Jack</first-name>
+ <phone>12345</phone>
+ </record>
+ <record>
+ <last-name>Ützgür</last-name>
+ <first-name>xxx</first-name>
+ <phone>7654</phone>
+ </record>
+</record-list>
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* Read a record-list, sort it, and print it as XML *)
+open Pxp_types;;
+open Pxp_document;;
+open Pxp_yacc;;
+
+let sort by tree =
+ map_tree
+ ~pre:
+ (fun n -> n # orphaned_flat_clone)
+ ~post:
+ (fun n ->
+ match n # node_type with
+ T_element "record-list" ->
+ let l = n # sub_nodes in
+ let l' = List.sort
+ (fun a b ->
+ let a_string =
+ try (find_element by a) # data
+ with Not_found -> "" in
+ let b_string =
+ try (find_element by b) # data
+ with Not_found -> "" in
+ Pervasives.compare a_string b_string)
+ l in
+ n # set_nodes l';
+ n
+ | _ ->
+ n)
+ tree
+;;
+
+
+let main() =
+ let criterion = ref "last-name" in
+ Arg.parse
+ [ "-by", Arg.String (fun s -> criterion := s),
+ " (last-name|first-name|phone)";
+ ]
+ (fun _ -> raise (Arg.Bad "Bad usage"))
+ "usage: sort [ options ]";
+ if not(List.mem !criterion ["last-name"; "first-name"; "phone"]) then (
+ prerr_endline ("Unknown criterion: " ^ !criterion);
+ exit 1
+ );
+ try
+ let dtd = parse_dtd_entity default_config (from_file "record.dtd") in
+ let tree =
+ parse_content_entity default_config (from_channel stdin) dtd default_spec
+ in
+ print_endline "<?xml encoding='ISO-8859-1'?>";
+ (sort !criterion tree) # write (Out_channel stdout) `Enc_iso88591
+ with
+ x ->
+ prerr_endline(string_of_exn x);
+ exit 1
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/30 16:05:44 gerd
+ * Minor update
+ *
+ * Revision 1.2 2000/08/24 09:40:11 gerd
+ * Allow that columns are missing.
+ *
+ * Revision 1.1 2000/08/22 21:57:44 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+*.new
+*.mlf
+*.ml0
+depend
+depend.pkg
+
--- /dev/null
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+pxpvalidate: validate.ml
+ ocamlfind ocamlc -o pxpvalidate -package "pxp" -linkpkg validate.ml
+
+pxpvalidate.opt: validate.ml
+ ocamlfind ocamlopt -o pxpvalidate.opt -package "pxp" -linkpkg validate.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f pxpvalidate pxpvalidate.opt
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let print_error e =
+ print_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ print_endline ("WARNING: " ^ w)
+ end
+;;
+
+let parse debug wf iso88591 filename =
+ try
+ (* Parse the document: *)
+ let parse_fn =
+ if wf then parse_wfdocument_entity
+ else
+ let index = new hash_index in
+ parse_document_entity
+ ?transform_dtd:None
+ ~id_index:(index :> 'ext index)
+ in
+ let doc =
+ parse_fn
+ { default_config with
+ debugging_mode = debug;
+ encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+ idref_pass = true;
+ warner = new warner
+ }
+ (from_file filename)
+ default_spec
+ in
+ ()
+ with
+ e ->
+ (* Print error; remember that there was an error *)
+ error_happened := true;
+ print_error e
+;;
+
+
+let main() =
+ let debug = ref false in
+ let wf = ref false in
+ let iso88591 = ref false in
+ let files = ref [] in
+ Arg.parse
+ [ "-d", Arg.Set debug,
+ " turn debugging mode on";
+ "-wf", Arg.Set wf,
+ " check only on well-formedness";
+ "-iso-8859-1", Arg.Set iso88591,
+ " use ISO-8859-1 as internal encoding instead of UTF-8";
+ ]
+ (fun x -> files := x :: !files)
+ "
+usage: pxpvalidate [options] file ...
+
+- checks the validity of XML documents. See below for list of options.
+
+<title>PXP - The XML parser for Objective Caml</title>
+
+List of options:";
+ files := List.rev !files;
+ List.iter (parse !debug !wf !iso88591) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.10 2000/08/30 15:58:41 gerd
+ * Updated.
+ *
+ * Revision 1.9 2000/07/14 14:57:30 gerd
+ * Updated: warner
+ *
+ * Revision 1.8 2000/07/14 14:13:15 gerd
+ * Cosmetic changes.
+ *
+ * Revision 1.7 2000/07/14 14:11:06 gerd
+ * Updated because of changes of the PXP API.
+ *
+ * Revision 1.6 2000/07/08 21:53:00 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.5 2000/06/04 20:21:55 gerd
+ * Updated to new module names.
+ *
+ * Revision 1.4 2000/05/01 16:44:57 gerd
+ * Added check for ID uniqueness.
+ * Using new error formatter.
+ *
+ * Revision 1.3 1999/11/09 22:27:30 gerd
+ * The programs returns now an exit code of 1 if one of the
+ * XML files produces an error.
+ *
+ * Revision 1.2 1999/09/01 23:09:56 gerd
+ * Added the option -wf that switches to well-formedness checking
+ * instead of validation.
+ *
+ * Revision 1.1 1999/08/14 22:20:53 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+*.cmi
+*.cmo
+*.cma
+*.cmx
+*.o
+*.a
+*.cmxa
+*.new
+*.mlf
+*.ml0
+depend
+depend.pkg
+
--- /dev/null
+# make xmlforms: make bytecode executable
+# make xmlforms.opt: make native executable
+# make clean: remove intermediate files
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files
+# make release: cleanup, create archive, tag CVS module
+# (for developers)
+#----------------------------------------------------------------------
+
+.PHONY: xmlforms
+xmlforms:
+ $(MAKE) -f Makefile.code xmlforms
+
+.PHONY: xmlforms.opt
+xmlforms.opt:
+ $(MAKE) -f Makefile.code xmlforms.opt
+
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C styles CLEAN
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg
+ rm -f xmlforms xmlforms.opt
+ $(MAKE) -C styles distclean
+
+
--- /dev/null
+#----------------------------------------------------------------------
+# specific rules for this package:
+
+OBJECTS = ds_context.cmo ds_style.cmo
+XOBJECTS = $(OBJECTS:.cmo=.cmx)
+ARCHIVE = xmlforms.cma
+XARCHIVE = xmlforms.cmxa
+NAME = xmlforms
+REQUIRES = camltk str pxp
+
+xmlforms: $(ARCHIVE) ds_app.cmo
+ ocamlfind ocamlc -g -o xmlforms -custom -package "$(REQUIRES)" \
+ -linkpkg $(ARCHIVE) ds_app.cmo
+
+xmlform.opt: $(XARCHIVE) ds_app.cmx
+ ocamlfind ocamlopt -o xmlforms.opt -custom -package "$(REQUIRES)" \
+ -linkpkg $(XARCHIVE) ds_app.cmx
+
+$(ARCHIVE): $(OBJECTS)
+ $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS)
+
+$(XARCHIVE): $(XOBJECTS)
+ $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+depend: *.ml *.mli
+ $(OCAMLDEP) *.ml *.mli >depend
+
+depend.pkg: Makefile
+ $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+*.mli:
+
+include depend
+include depend.pkg
--- /dev/null
+-----------------------------------------------------------------------------
+xmlforms
+-----------------------------------------------------------------------------
+
+THE IDEA:
+
+This example uses XML for two purposes:
+
+- The "story" and layout of the application is specified in XML
+- The data records are stored in XML
+
+An "application" is a set of "masks" or sequences of masks, and every mask
+is thought as a visible page of the application, containing layout
+elements and functional elements. Layout is specified in TeX-style using
+hboxes, vboxes, hspaces, vspaces. Functional elements are "entries" (input
+box for a string with one line), "textboxes" (input boxes with several
+lines), and buttons.
+
+See styles/ds-style.dtd for the DTD of an application specification, and
+the other xml files in this directory for examples.
+
+The entries and textboxes are bound to "slots", i.e. string variables. If
+the application is started, the slots are read from a file, and if the
+user presses a special "save" button, the slots are stored into this file.
+The format of this data file is again XML; the simplistic DTD can be found
+in styles/ds-object.dtd.
+
+
+THE IMPLEMENTATION:
+
+There is currently a mapping of the specifications to ocamltk, done by a
+program called "xmlforms".
+
+
+HOW TO COMPILE:
+
+It is assumed that "findlib" is present on your system; see ABOUT-FINDLIB
+in the toplevel directory.
+The "markup" module must have been installed.
+
+- "make xmlforms" produces a bytecode executable "xmlforms"
+- "make xmlforms.opt" produces a native executable "xmlforms.opt"
+
+Note that you cannot start the executables directly:
+
+
+HOW TO START AN APPLICATION:
+
+As "xmlforms" is a generic executable, there is a simple mechanism to bind
+it to a specific instance of an application. For example, in the "styles"
+subdirectory there is the application specification "crazy-style.xml". To
+start it, make a symlink called "crazy" referring to the "xmlforms"
+binary, set the environment variable DATASHEETS to the directory where the
+DTDs and XML files can be found, and start "crazy":
+
+ ln -s ../xmlforms crazy
+ DATASHEETS=. crazy my-record.xml
+
+(If you do not set DATASHEETS, a default directory, normally
+"/opt/xmlforms/lib" is used.)
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Tk
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+open Ds_context
+open Ds_style
+
+
+let installdir =
+ try Sys.getenv "DATASHEETS" with
+ Not_found -> "/opt/xmlforms/lib"
+let style_sysid = ref ""
+let object_dtd_sysid = Filename.concat installdir "ds-object.dtd"
+let object_dtd_root = "record"
+
+
+let rec print_error e =
+ print_endline (string_of_exn e)
+;;
+
+
+let run f arg1 arg2 =
+ try f arg1 arg2 with
+ e -> print_error e
+;;
+
+
+let edit filename cmd =
+ (* read in style definition *)
+ let index = new hash_index in
+ let style =
+ parse_document_entity
+ ~id_index:(index :> 'ext index)
+ default_config
+ (from_file !style_sysid)
+ tag_map
+ in
+ let root = style # root in
+ root # extension # prepare (index :> 'ext index);
+
+ let obj_dtd =
+ parse_dtd_entity
+ default_config
+ (from_file object_dtd_sysid)
+ in
+ obj_dtd # set_root object_dtd_root;
+
+ let topframe = openTk() in
+ let context = new context filename obj_dtd index root topframe in
+
+ Toplevel.configure topframe [ Width (Centimeters 20.0);
+ Height (Centimeters 12.0);
+ ];
+ Pack.propagate_set topframe false;
+ Wm.title_set topframe cmd;
+ context # goto (root # extension # start_node_name);
+ mainLoop()
+;;
+
+
+let main() =
+ let cmd = Filename.basename Sys.argv.(0) in
+ match Sys.argv with
+ [| _; filename |] ->
+ style_sysid := Filename.concat installdir (cmd ^ "-style.xml");
+ run edit filename cmd
+ | _ ->
+ prerr_endline ("usage: " ^ cmd ^ " filename");
+ exit(1)
+;;
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/07/16 19:36:03 gerd
+ * Updated.
+ *
+ * Revision 1.5 2000/07/08 22:03:11 gerd
+ * Updates because of PXP interface changes.
+ *
+ * Revision 1.4 2000/06/04 20:29:19 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.3 2000/05/01 16:48:45 gerd
+ * Using the new error formatter.
+ *
+ * Revision 1.2 1999/12/17 21:34:29 gerd
+ * The name of the root element is set to "record" in the
+ * object_dtd; otherwise the parser would not check that the root
+ * element is the right element.
+ *
+ * Revision 1.1 1999/08/21 19:11:05 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Pxp_yacc
+
+let empty_record = new element_impl (Pxp_yacc.default_extension);;
+let empty_dnode = new data_impl Pxp_yacc.default_extension;;
+
+class context the_filename the_obj_dtd the_index the_root the_topframe =
+ object (self)
+ val filename = the_filename
+ val obj_dtd = the_obj_dtd
+ val node_index = the_index
+ val mutable obj = empty_record # create_element
+ the_obj_dtd (T_element "record") []
+ val root = the_root
+ val topframe = the_topframe
+ val mutable wdg = None
+
+ val mutable history = ( [| |] : string array )
+ val mutable index = 0
+
+ initializer
+ self # load_obj
+
+ method obj = obj
+
+ (* history *)
+
+ method private leave_node =
+ begin match wdg with
+ None -> ()
+ | Some w -> Tk.destroy w
+ end;
+ wdg <- None
+
+ method private enter_node =
+ let where = history.(index) in
+ let n =
+ try node_index # find where with
+ Not_found -> failwith ("Mask not found: " ^ where) in
+ let w = n # extension # create_widget topframe self in
+ Tk.pack [w] (n # extension # pack_opts @ [ Tk.Expand true] );
+ wdg <- Some w
+
+
+
+ method previous =
+ if index > 0 then
+ index <- index - 1
+ else
+ raise Not_found;
+ self # leave_node;
+ self # enter_node;
+
+
+ method next =
+ if index < Array.length history - 1 then
+ index <- index + 1
+ else
+ raise Not_found;
+ self # leave_node;
+ self # enter_node;
+
+
+ method goto where =
+ assert (index <= Array.length history);
+ self # leave_node;
+ let persisting_history =
+ if index < Array.length history then
+ Array.sub history 0 (index+1)
+ else
+ history
+ in
+ history <- Array.concat [ persisting_history; [| where |] ];
+ index <- Array.length history - 1;
+ self # enter_node;
+
+
+ method current =
+ if index < Array.length history then
+ history.(index)
+ else
+ raise Not_found
+
+
+ (* read, write the slots of object *)
+
+ method search_slot name =
+ let rec search n =
+ match n # node_type with
+ T_element "string" ->
+ if n # required_string_attribute "name" = name then
+ n
+ else raise Not_found
+ | T_element _ ->
+ search_list (n # sub_nodes)
+ | T_data ->
+ raise Not_found
+ | _ ->
+ assert false
+
+ and search_list l =
+ match l with
+ x :: l' ->
+ (try search x with Not_found -> search_list l')
+ | [] ->
+ raise Not_found
+ in
+ search obj
+
+ method get_slot name =
+ let d = (self # search_slot name) # data in
+ d
+
+ method set_slot name value =
+ let dtd = obj # dtd in
+ begin try
+ let n = self # search_slot name in
+ n # delete
+ with
+ Not_found -> ()
+ end;
+ let e_string = empty_record # create_element dtd (T_element "string")
+ [ "name", name ] in
+ let dnode = empty_dnode # create_data dtd value in
+ e_string # add_node dnode;
+ e_string # local_validate();
+ obj # add_node e_string;
+ assert(self # get_slot name = value)
+
+ (* load, save object *)
+
+
+ method load_obj =
+ if Sys.file_exists filename then begin
+ obj <- parse_content_entity
+ default_config
+ (from_file filename)
+ obj_dtd
+ default_spec
+ end
+ else begin
+ print_string "New file!\n";
+ flush stdout
+ end
+
+
+ method save_obj =
+ let fd = open_out filename in
+ try
+
+ let re1 = Str.regexp "&" in
+ let re2 = Str.regexp "<" in
+ let re3 = Str.regexp "'" in
+ let re4 = Str.regexp ">" in
+ let protect s =
+ let s1 = Str.global_replace re1 "&" s in
+ let s2 = Str.global_replace re2 "<" s1 in
+ let s3 = Str.global_replace re3 "'" s2 in
+ let s4 = Str.global_replace re2 ">" s1 in
+ s3
+ in
+
+ let rec iterate (n : 'node extension node as 'node) =
+ match n # node_type with
+ T_data ->
+ output_string fd (protect (n # data))
+ | T_element name ->
+ output_string fd ("<" ^ name ^ "\n");
+ let anames = n # attribute_names in
+ List.iter
+ (fun aname ->
+ let aval = n # attribute aname in
+ let v =
+ match aval with
+ Value s ->
+ aname ^ "='" ^ protect s ^ "'\n"
+ | Valuelist l ->
+ aname ^ "='" ^ String.concat " " (List.map protect l) ^ "'\n"
+ | Implied_value ->
+ ""
+ in
+ output_string fd v)
+ anames;
+ output_string fd ">";
+ List.iter iterate (n # sub_nodes);
+ output_string fd ("</" ^ name ^ "\n>");
+ | _ ->
+ assert false
+ in
+
+ output_string fd "<?xml version='1.0' encoding='ISO-8859-1'?>\n";
+ iterate obj;
+ close_out fd
+ with
+ e ->
+ close_out fd;
+ raise e
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/30 15:58:49 gerd
+ * Updated.
+ *
+ * Revision 1.6 2000/07/23 20:25:05 gerd
+ * Update because of API change: local_validate.
+ *
+ * Revision 1.5 2000/07/16 19:36:03 gerd
+ * Updated.
+ *
+ * Revision 1.4 2000/07/08 22:03:11 gerd
+ * Updates because of PXP interface changes.
+ *
+ * Revision 1.3 2000/06/04 20:29:19 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 2000/05/30 00:09:08 gerd
+ * Minor fix.
+ *
+ * Revision 1.1 1999/08/21 19:11:05 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types
+open Pxp_document
+open Ds_context
+
+
+let get_dimension s =
+ let re = Str.regexp "\\([0-9]*\\(.[0-9]+\\)?\\)[ \t\n]*\\(px\\|cm\\|in\\|mm\\|pt\\)" in
+ if Str.string_match re s 0 then begin
+ let number = Str.matched_group 1 s in
+ let dim = Str.matched_group 3 s in
+ match dim with
+ "px" -> Tk.Pixels (int_of_float (float_of_string number))
+ | "cm" -> Tk.Centimeters (float_of_string number)
+ | "in" -> Tk.Inches (float_of_string number)
+ | "mm" -> Tk.Millimeters (float_of_string number)
+ | "pt" -> Tk.PrinterPoint (float_of_string number)
+ | _ -> assert false
+ end
+ else
+ failwith ("Bad dimension: " ^ s)
+;;
+
+
+class virtual shared =
+ object(self)
+
+ (* --- default_ext --- *)
+
+ val mutable node = (None : shared node option)
+
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+
+ (* --- shared attributes: color & font settings --- *)
+
+ val mutable fgcolor = (None : string option)
+ val mutable bgcolor = (None : string option)
+ val mutable font = (None : string option)
+
+ method fgcolor =
+ (* Get the foreground color: If there is a local value, return it;
+ * otherwise ask parent node
+ *)
+ match fgcolor with
+ Some c -> c
+ | None -> try self # node # parent # extension # fgcolor with
+ Not_found -> failwith "#fgcolor"
+
+ method bgcolor =
+ (* Get the background color: If there is a local value, return it;
+ * otherwise ask parent node
+ *)
+ match bgcolor with
+ Some c -> c
+ | None -> try self # node # parent # extension # bgcolor with
+ Not_found -> failwith "#bgcolor"
+
+ method font =
+ (* Get the current font: If there is a local value, return it;
+ * otherwise ask parent node
+ *)
+ match font with
+ Some c -> c
+ | None -> try self # node # parent # extension # font with
+ Not_found -> failwith "#font"
+
+ method private init_color_and_font =
+ let get_color n =
+ try
+ match self # node # attribute n with
+ Value v -> Some v
+ | Implied_value -> None
+ | _ -> assert false
+ with Not_found -> None in
+ fgcolor <- get_color "fgcolor";
+ bgcolor <- get_color "bgcolor";
+ font <- get_color "font"; (* sic! *)
+
+
+ method private bg_color_opt =
+ [ Tk.Background (Tk.NamedColor (self # bgcolor)) ]
+
+ method private fg_color_opt =
+ [ Tk.Foreground (Tk.NamedColor (self # fgcolor)) ]
+
+ method private font_opt =
+ [ Tk.Font (self # font) ]
+
+ (* --- virtual --- *)
+
+ method virtual prepare : shared Pxp_yacc.index -> unit
+ method virtual create_widget : Widget.widget -> context -> Widget.widget
+
+ method pack_opts = ( [] : Tk.options list )
+ method xstretchable = false
+ method ystretchable = false
+
+ method accept (c:context) = ()
+
+ method private get_mask =
+ (* find parent which is a mask *)
+ let rec search n =
+ match n # node_type with
+ T_element "mask" ->
+ n # extension
+ | T_element _ ->
+ search (n # parent)
+ | _ ->
+ assert false
+ in
+ search (self # node)
+
+
+ method private accept_mask (c:context) =
+ let rec iterate n =
+ n # extension # accept c;
+ List.iter iterate (n # sub_nodes)
+ in
+ iterate (self # get_mask # node)
+
+
+ method start_node_name =
+ (failwith "#start_node_name" : string)
+
+ (* --- debug --- *)
+
+ method private name =
+ let nt = self # node # node_type in
+ match nt with
+ T_element n -> n
+ | T_data -> "#PCDATA"
+ | _ -> assert false
+
+ end
+;;
+
+
+class default =
+ object (self)
+ inherit shared
+
+ method prepare idx =
+ self # init_color_and_font
+
+ method create_widget w c =
+ failwith "default # create_widget"
+ end
+;;
+
+
+let dummy_node = new element_impl (new default);;
+
+class application =
+ object (self)
+ inherit shared
+
+ val mutable start_node = dummy_node
+
+ method prepare idx =
+ (* prepare this node *)
+ self # init_color_and_font;
+ if fgcolor = None then fgcolor <- Some "black";
+ if bgcolor = None then bgcolor <- Some "white";
+ if font = None then font <- Some "fixed";
+ let start =
+ match self # node # attribute "start" with
+ Value v -> v
+ | _ -> assert false in
+ start_node <- (try idx # find start with
+ Not_found -> failwith "Start node not found");
+ (* iterate over the subtree *)
+ let rec iterate n =
+ n # extension # prepare idx;
+ List.iter iterate (n # sub_nodes)
+ in
+ List.iter iterate (self # node # sub_nodes)
+
+
+ method start_node_name =
+ match self # node # attribute "start" with
+ Value v -> v
+ | _ -> assert false
+
+ method create_widget w c =
+ start_node # extension # create_widget w c
+
+ method pack_opts =
+ start_node # extension # pack_opts
+ end
+;;
+
+
+class sequence =
+ object (self)
+ inherit shared
+
+ method prepare idx =
+ self # init_color_and_font;
+
+ method create_widget w c =
+ let node = List.hd (self # node # sub_nodes) in
+ node # extension # create_widget w c
+
+ method pack_opts =
+ let node = List.hd (self # node # sub_nodes) in
+ node # extension # pack_opts
+ end
+;;
+
+
+class vbox =
+ object (self)
+ inherit shared
+
+ val mutable att_halign = "left"
+
+ method prepare idx =
+ self # init_color_and_font;
+ match self # node # attribute "halign" with
+ Value v -> att_halign <- v
+ | _ -> assert false
+
+ method create_widget w c =
+ let f = Frame.create w (self # bg_color_opt) in
+ let nodes = self # node # sub_nodes in
+ let options =
+ match att_halign with
+ "left" -> [ Tk.Anchor Tk.W ]
+ | "right" -> [ Tk.Anchor Tk.E ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ List.iter
+ (fun n ->
+ let opts = n # extension # pack_opts in
+ let wdg = n # extension # create_widget f c in
+ Tk.pack [wdg] (options @ opts);
+ )
+ nodes;
+ f
+
+ method pack_opts =
+ match self # xstretchable, self # ystretchable with
+ true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ]
+ | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ]
+ | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ]
+ | false, false -> []
+
+ method xstretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # xstretchable) nodes
+
+ method ystretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # ystretchable) nodes
+
+ end
+
+;;
+
+
+class mask =
+ object (self)
+
+ inherit vbox
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_halign <- "left"
+ end
+;;
+
+
+class hbox =
+ object (self)
+ inherit shared
+
+ val mutable att_width = None
+ val mutable att_halign = "left"
+ val mutable att_valign = "top"
+
+ method prepare idx =
+ self # init_color_and_font;
+ begin match self # node # attribute "halign" with
+ Value v -> att_halign <- v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "valign" with
+ Value v -> att_valign <- v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "width" with
+ Value v -> att_width <- Some (get_dimension v)
+ | Implied_value -> att_width <- None
+ | _ -> assert false
+ end
+
+ method create_widget w c =
+ let f1 = Frame.create w (self # bg_color_opt) in
+ let f_extra =
+ match att_width with
+ None -> []
+ | Some wd ->
+ [ Canvas.create f1
+ ( [ Tk.Width wd; Tk.Height (Tk.Pixels 0);
+ Tk.Relief Tk.Flat;
+ Tk.HighlightThickness (Tk.Pixels 0);
+ ] @
+ self # bg_color_opt ) ]
+ in
+ let f2 = Frame.create f1 (self # bg_color_opt) in
+ let nodes = self # node # sub_nodes in
+
+ let outer_pack_opts =
+ match att_halign with
+ "left" -> [ Tk.Anchor Tk.W ]
+ | "right" -> [ Tk.Anchor Tk.E ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ let inner_pack_opts =
+ match att_valign with
+ "top" -> [ Tk.Anchor Tk.N ]
+ | "bottom" -> [ Tk.Anchor Tk.S ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ List.iter
+ (fun n ->
+ let opts = n # extension # pack_opts in
+ let wdg = n # extension # create_widget f2 c in
+ Tk.pack [wdg] (inner_pack_opts @ [ Tk.Side Tk.Side_Left ] @ opts);
+ )
+ nodes;
+ let extra_opts = self # pack_opts in
+ Tk.pack (f_extra @ [f2]) (outer_pack_opts @ extra_opts);
+ f1
+
+ method pack_opts =
+ match self # xstretchable, self # ystretchable with
+ true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ]
+ | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ]
+ | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ]
+ | false, false -> []
+
+ method xstretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # xstretchable) nodes
+
+ method ystretchable =
+ let nodes = self # node # sub_nodes in
+ List.exists (fun n -> n # extension # ystretchable) nodes
+
+ end
+;;
+
+class vspace =
+ object (self)
+ inherit shared
+
+ val mutable att_height = Tk.Pixels 0
+ val mutable att_fill = false
+
+ method prepare idx =
+ self # init_color_and_font;
+ begin match self # node # attribute "height" with
+ Value v -> att_height <- get_dimension v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "fill" with
+ Value "yes" -> att_fill <- true
+ | Value "no" -> att_fill <- false
+ | _ -> assert false
+ end
+
+
+ method create_widget w c =
+ let f = Frame.create w ( self # bg_color_opt ) in
+ let strut =
+ Canvas.create f
+ ( [ Tk.Height att_height; Tk.Width (Tk.Pixels 0);
+ Tk.Relief Tk.Flat;
+ Tk.HighlightThickness (Tk.Pixels 0);
+ ] @
+ self # bg_color_opt ) in
+ if att_fill then
+ Tk.pack [strut] [Tk.Fill Tk.Fill_Y; Tk.Expand true]
+ else
+ Tk.pack [strut] [];
+ f
+
+ method pack_opts =
+ if att_fill then [ Tk.Fill Tk.Fill_Y; Tk.Expand true ] else []
+
+ method ystretchable = att_fill
+ end
+;;
+
+class hspace =
+ object (self)
+ inherit shared
+
+
+ val mutable att_width = Tk.Pixels 0
+ val mutable att_fill = false
+
+ method prepare idx =
+ self # init_color_and_font;
+ begin match self # node # attribute "width" with
+ Value v -> att_width <- get_dimension v
+ | _ -> assert false
+ end;
+ begin match self # node # attribute "fill" with
+ Value "yes" -> att_fill <- true
+ | Value "no" -> att_fill <- false
+ | _ -> assert false
+ end
+
+
+ method create_widget w c =
+ let f = Frame.create w ( self # bg_color_opt ) in
+ let strut =
+ Canvas.create f
+ ( [ Tk.Width att_width; Tk.Height (Tk.Pixels 0);
+ Tk.Relief Tk.Flat;
+ Tk.HighlightThickness (Tk.Pixels 0);
+ ] @
+ self # bg_color_opt ) in
+ if att_fill then
+ Tk.pack [strut] [Tk.Fill Tk.Fill_X; Tk.Expand true]
+ else
+ Tk.pack [strut] [];
+ f
+
+ method pack_opts =
+ if att_fill then [ Tk.Fill Tk.Fill_X; Tk.Expand true ] else []
+
+ method xstretchable = att_fill
+ end
+;;
+
+class label =
+ object (self)
+ inherit shared
+
+ val mutable att_textwidth = (-1)
+ val mutable att_halign = "left"
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_textwidth <- (match self # node # attribute "textwidth" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_halign <- (match self # node # attribute "halign" with
+ Value v -> v
+ | _ -> assert false);
+
+
+ method create_widget w c =
+ let opts_textwidth = if att_textwidth < 0 then [] else
+ [ Tk.TextWidth att_textwidth ] in
+ let opts_halign =
+ match att_halign with
+ "left" -> [ Tk.Anchor Tk.W ]
+ | "right" -> [ Tk.Anchor Tk.E ]
+ | "center" -> [ Tk.Anchor Tk.Center ]
+ | _ -> assert false
+ in
+ let opts_content =
+ [ Tk.Text (self # node # data) ] in
+ let label = Label.create w (opts_textwidth @ opts_halign @
+ opts_content @ self # bg_color_opt @
+ self # fg_color_opt @ self # font_opt) in
+ label
+
+ end
+;;
+
+class entry =
+ object (self)
+ inherit shared
+
+ val mutable tv = lazy (Textvariable.create())
+ val mutable att_textwidth = (-1)
+ val mutable att_slot = ""
+
+ method prepare idx =
+ self # init_color_and_font;
+ tv <- lazy (Textvariable.create());
+ att_textwidth <- (match self # node # attribute "textwidth" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_slot <- (match self # node # attribute "slot" with
+ Value v -> v
+ | _ -> assert false);
+
+ method create_widget w c =
+ let opts_textwidth = if att_textwidth < 0 then [] else
+ [ Tk.TextWidth att_textwidth ] in
+ let e = Entry.create w ( [ Tk.TextVariable (Lazy.force tv) ] @
+ self # fg_color_opt @
+ self # bg_color_opt @
+ self # font_opt @
+ opts_textwidth
+ ) in
+ let s =
+ try c # get_slot att_slot with
+ Not_found -> self # node # data in
+ Textvariable.set (Lazy.force tv) s;
+ e
+
+ method accept c =
+ c # set_slot att_slot (Textvariable.get (Lazy.force tv))
+
+ end
+;;
+
+class textbox =
+ object (self)
+ inherit shared
+
+ val mutable att_textwidth = (-1)
+ val mutable att_textheight = (-1)
+ val mutable att_slot = ""
+ val mutable last_widget = None
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_textwidth <- (match self # node # attribute "textwidth" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_textheight <- (match self # node # attribute "textheight" with
+ Value v ->
+ let w = try int_of_string v
+ with _ -> failwith ("Not an integer: " ^ v) in
+ w
+ | Implied_value ->
+ (-1)
+ | _ -> assert false);
+ att_slot <- (match self # node # attribute "slot" with
+ Value v -> v
+ | Implied_value -> ""
+ | _ -> assert false);
+
+
+ method create_widget w c =
+ let opts_textwidth = if att_textwidth < 0 then [] else
+ [ Tk.TextWidth att_textwidth ] in
+ let opts_textheight = if att_textheight < 0 then [] else
+ [ Tk.TextHeight att_textheight ] in
+ let f = Frame.create w (self # bg_color_opt) in
+ let vscrbar = Scrollbar.create f [ Tk.Orient Tk.Vertical ] in
+ let e = Text.create f ( [ ] @
+ self # fg_color_opt @
+ self # bg_color_opt @
+ self # font_opt @
+ opts_textwidth @ opts_textheight
+ ) in
+ last_widget <- Some e;
+ Scrollbar.configure vscrbar [ Tk.ScrollCommand
+ (fun s -> Text.yview e s);
+ Tk.Width (Tk.Pixels 9) ];
+ Text.configure e [ Tk.YScrollCommand
+ (fun a b -> Scrollbar.set vscrbar a b) ];
+ let s =
+ if att_slot <> "" then
+ try c # get_slot att_slot with
+ Not_found -> self # node # data
+ else
+ self # node # data
+ in
+ (* Text.insert appends always a newline to the last line; so strip
+ * an existing newline first
+ *)
+ let s' =
+ if s <> "" & s.[String.length s - 1] = '\n' then
+ String.sub s 0 (String.length s - 1)
+ else
+ s in
+ Text.insert e (Tk.TextIndex(Tk.End,[])) s' [];
+ if att_slot = "" then
+ Text.configure e [ Tk.State Tk.Disabled ];
+ Tk.pack [e] [ Tk.Side Tk.Side_Left ];
+ Tk.pack [vscrbar] [ Tk.Side Tk.Side_Left; Tk.Fill Tk.Fill_Y ];
+ f
+
+ method accept c =
+ if att_slot <> "" then
+ match last_widget with
+ None -> ()
+ | Some w ->
+ let s =
+ Text.get
+ w
+ (Tk.TextIndex(Tk.LineChar(1,0),[]))
+ (Tk.TextIndex(Tk.End,[])) in
+ c # set_slot att_slot s
+
+ end
+;;
+
+class button =
+ object (self)
+ inherit shared
+
+ val mutable att_label = ""
+ val mutable att_action = ""
+ val mutable att_goto = ""
+
+ method prepare idx =
+ self # init_color_and_font;
+ att_label <- (match self # node # attribute "label" with
+ Value v -> v
+ | _ -> assert false);
+ att_action <- (match self # node # attribute "action" with
+ Value v -> v
+ | _ -> assert false);
+ att_goto <- (match self # node # attribute "goto" with
+ Value v -> v
+ | Implied_value -> ""
+ | _ -> assert false);
+ if att_action = "goto" then begin
+ try let _ = idx # find att_goto in () with
+ Not_found -> failwith ("Target `" ^ att_goto ^ "' not found")
+ end;
+ if att_action = "list-prev" or att_action = "list-next" then begin
+ let m = self # get_mask in
+ if m # node # parent # node_type <> T_element "sequence" then
+ failwith ("action " ^ att_action ^ " must not be used out of <sequence>");
+ end;
+
+
+ method create_widget w c =
+ let cmd () =
+ self # accept_mask c;
+ match att_action with
+ "goto" ->
+ c # goto att_goto
+ | "save" ->
+ c # save_obj
+ | "exit" ->
+ Protocol.closeTk()
+ | "save-exit" ->
+ c # save_obj;
+ Protocol.closeTk()
+ | "list-prev" ->
+ let m = self # get_mask # node in
+ let s = m # parent in
+ let rec search l =
+ match l with
+ x :: y :: l' ->
+ if y == m then
+ match x # attribute "name" with
+ Value s -> c # goto s
+ | _ -> assert false
+ else
+ search (y :: l')
+ | _ -> ()
+ in
+ search (s # sub_nodes)
+ | "list-next" ->
+ let m = self # get_mask # node in
+ let s = m # parent in
+ let rec search l =
+ match l with
+ x :: y :: l' ->
+ if x == m then
+ match y # attribute "name" with
+ Value s -> c # goto s
+ | _ -> assert false
+ else
+ search (y :: l')
+ | _ -> ()
+ in
+ search (s # sub_nodes)
+ | "hist-prev" ->
+ (try c # previous with Not_found -> ())
+ | "hist-next" ->
+ (try c # next with Not_found -> ())
+ | _ -> ()
+ in
+ let b = Button.create w ( [ Tk.Text att_label; Tk.Command cmd ] @
+ self # fg_color_opt @
+ self # bg_color_opt @
+ self # font_opt ) in
+ b
+
+
+ end
+;;
+
+
+(**********************************************************************)
+
+open Pxp_yacc
+
+let tag_map =
+ make_spec_from_mapping
+ ~data_exemplar:(new data_impl (new default))
+ ~default_element_exemplar:(new element_impl (new default))
+ ~element_mapping:
+ (let m = Hashtbl.create 50 in
+ Hashtbl.add m "application"
+ (new element_impl (new application));
+ Hashtbl.add m "sequence"
+ (new element_impl (new sequence));
+ Hashtbl.add m "mask"
+ (new element_impl (new mask));
+ Hashtbl.add m "vbox"
+ (new element_impl (new vbox));
+ Hashtbl.add m "hbox"
+ (new element_impl (new hbox));
+ Hashtbl.add m "vspace"
+ (new element_impl (new vspace));
+ Hashtbl.add m "hspace"
+ (new element_impl (new hspace));
+ Hashtbl.add m "label"
+ (new element_impl (new label));
+ Hashtbl.add m "entry"
+ (new element_impl (new entry));
+ Hashtbl.add m "textbox"
+ (new element_impl (new textbox));
+ Hashtbl.add m "button"
+ (new element_impl (new button));
+ m)
+ ()
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:31 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/08/30 15:58:49 gerd
+ * Updated.
+ *
+ * Revision 1.4 2000/07/16 19:36:03 gerd
+ * Updated.
+ *
+ * Revision 1.3 2000/07/08 22:03:11 gerd
+ * Updates because of PXP interface changes.
+ *
+ * Revision 1.2 2000/06/04 20:29:19 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.1 1999/08/21 19:11:05 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+
+.PHONY: symlinks
+symlinks:
+ for x in *-style.xml; do ln -s ../xmlforms $${x%-style.xml} || true; done
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!-- $Id$ -->
+
+<!DOCTYPE application SYSTEM "ds-style.dtd" [
+<!ENTITY h1.font '-*-helvetica-bold-r-*-*-18-*-*-*-*-*-*-*'>
+<!ENTITY h2.font '-*-helvetica-bold-r-*-*-14-*-*-*-*-*-*-*'>
+<!ENTITY h3.font '-*-helvetica-bold-r-*-*-12-*-*-*-*-*-*-*'>
+<!ENTITY dfl.font '-*-helvetica-medium-r-*-*-12-*-*-*-*-*-*-*'>
+<!ENTITY dfl.bold.font '-*-helvetica-bold-r-*-*-12-*-*-*-*-*-*-*'>
+<!ENTITY in.font '-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*'>
+
+<!ENTITY bg.button 'lightblue'>
+<!ENTITY bg.hilfe '#E0E0E0'>
+<!ENTITY fg.hilfe 'black'>
+
+<!ENTITY headline
+ '<vbox>
+ <label font="&h2.font;" fgcolor="darkgreen">More about person...</label>
+ <vspace height="2mm"/>
+ <hbox>
+ <hbox bgcolor="black">
+ <hspace width="18cm"/>
+ <vbox><vspace height="2px"/></vbox>
+ </hbox>
+ <hspace width="5mm"/>
+ </hbox>
+ <vspace height="3mm"/>
+ </vbox>'>
+
+<!ENTITY help.headline
+ '<vbox>
+ <label font="&h2.font;" fgcolor="darkgreen">Help</label>
+ <vspace height="2mm"/>
+ <hbox>
+ <hbox bgcolor="black">
+ <hspace width="18cm"/>
+ <vbox><vspace height="2px"/></vbox>
+ </hbox>
+ <hspace width="5mm"/>
+ </hbox>
+ <vspace height="3mm"/>
+ </vbox>'>
+
+<!ENTITY info.headline
+ '<vbox>
+ <label font="&h2.font;" fgcolor="darkgreen">About xmlforms</label>
+ <vspace height="2mm"/>
+ <hbox>
+ <hbox bgcolor="black">
+ <hspace width="18cm"/>
+ <vbox><vspace height="2px"/></vbox>
+ </hbox>
+ <hspace width="5mm"/>
+ </hbox>
+ <vspace height="3mm"/>
+ </vbox>'>
+
+<!ENTITY footline
+ '<vbox>
+ <hbox>
+ <hbox bgcolor="black">
+ <hspace width="18cm"/>
+ <vbox><vspace height="2px"/></vbox>
+ </hbox>
+ <hspace width="5mm"/>
+ </hbox>
+ <vspace height="2mm"/>
+ <hbox>
+ <button bgcolor="&bg.button;" label="Previous" action="list-prev"/>
+ <button bgcolor="&bg.button;" label="Next" action="list-next"/>
+ <hspace width="0pt" fill="yes"/>
+ <button bgcolor="&bg.button;" label="Home" goto="start-page"/>
+ <hspace width="5mm"/>
+ </hbox>
+ </vbox>'>
+
+<!ENTITY help.footline
+ '<vbox>
+ <hbox>
+ <hbox bgcolor="black">
+ <hspace width="18cm"/>
+ <vbox><vspace height="2px"/></vbox>
+ </hbox>
+ <hspace width="5mm"/>
+ </hbox>
+ <vspace height="2mm"/>
+ <hbox>
+ <button bgcolor="&bg.button;" label="Back" action="hist-prev"/>
+ <hspace width="0pt" fill="yes"/>
+ </hbox>
+ </vbox>'>
+
+<!ENTITY info.footline '&help.footline;'>
+
+]>
+
+<!-- ***************************************************************** -->
+<!-- ************************ ************************** -->
+<!-- ************************ Starting page ************************** -->
+<!-- ************************ ************************** -->
+<!-- ***************************************************************** -->
+
+<application start="start-page"
+ font="&dfl.font;"
+>
+
+ <mask name="start-page">
+ <vspace height="5mm"/>
+ <hbox>
+ <hspace width="5mm"/>
+ <vbox>
+ <vbox font="&h1.font;">
+ <label>A sample xmlforms application:</label>
+ <label>Address editor</label>
+ </vbox>
+ <vspace height="1cm"/>
+ <vbox>
+ <hbox>
+ <hbox width="6cm" halign="right">
+ <label>Name:</label>
+ </hbox>
+ <entry font="&in.font;" textwidth="40" slot="person.name"/>
+ </hbox>
+ <hbox>
+ <hbox width="6cm" halign="right">
+ <label>Postal address:</label>
+ </hbox>
+ <textbox font="&in.font;"
+ textwidth="40"
+ textheight="5"
+ slot="person.address"/>
+ </hbox>
+ <hbox>
+ <hbox width="6cm" halign="right">
+ <label>Email:</label>
+ </hbox>
+ <entry font="&in.font;" textwidth="40" slot="person.email"/>
+ </hbox>
+ <hbox>
+ <hbox width="6cm" halign="right">
+ <label>Telephone number:</label>
+ </hbox>
+ <entry font="&in.font;" textwidth="20" slot="person.phone-number"/>
+ </hbox>
+ </vbox>
+ <vspace height="1cm"/>
+ <hbox>
+ <hspace width="3cm"/>
+ <hbox width="8cm">
+ <vbox>
+ <button bgcolor="&bg.button;"
+ label="More about this person..."
+ goto="person-list"/>
+ <button bgcolor="&bg.button;"
+ label="Save"
+ action="save"/>
+ </vbox>
+ </hbox>
+ <hbox>
+ <vbox>
+ <button bgcolor="&bg.button;"
+ label="Info..."
+ goto="info"/>
+ <button bgcolor="&bg.button;"
+ label="Exit (without saving)"
+ action="exit"/>
+ </vbox>
+ </hbox>
+ </hbox>
+ <vspace height="0px" fill="yes"/>
+ <hbox>
+ <hspace width="0px" fill="yes"/>
+ </hbox>
+ </vbox>
+ </hbox>
+ </mask>
+
+ <!-- ***************************************************************** -->
+ <!-- ********************** **************************** -->
+ <!-- ********************** More about... **************************** -->
+ <!-- ********************** **************************** -->
+ <!-- ***************************************************************** -->
+
+ <sequence name="person-list">
+ <mask name="Department">
+ <!-- ************************** HEADER ************************** -->
+ <vspace height="5mm"/>
+ <hbox>
+ <hspace width="5mm"/>
+ <vbox>
+ &headline;
+ <!-- ************************** CONTENT ************************* -->
+ <label font="&h1.font;">Department</label>
+ <vspace height="3mm"/>
+ <label>The person is working in this department:</label>
+ <hbox>
+ <hspace width="1cm"/>
+ <entry font="&in.font;"
+ textwidth="70"
+ slot="person.department"/>
+ </hbox>
+ <vspace height="3mm"/>
+ <label>The project he/she is working for:</label>
+ <hbox>
+ <hspace width="1cm"/>
+ <textbox font="&in.font;"
+ textwidth="70"
+ textheight="5"
+ slot="person.project"/>
+ </hbox>
+ <vspace height="3mm"/>
+ <button bgcolor="&bg.button;"
+ label="Help"
+ goto="help.department"/>
+ <!-- ************************************************************ -->
+ </vbox>
+ </hbox>
+ <!-- ************************** FOOTER ************************** -->
+ <vspace height="0px" fill="yes"/>
+ <hbox>
+ <hspace width="5mm"/>
+ &footline;
+ </hbox>
+ </mask>
+
+
+ <mask name="business-contacts">
+ <!-- ************************** HEADER ************************** -->
+ <vspace height="5mm"/>
+ <hbox>
+ <hspace width="5mm"/>
+ <vbox>
+ &headline;
+ <!-- ************************** CONTENT ************************* -->
+ <label font="&h1.font;">Business Contacts</label>
+ <vspace height="3mm"/>
+ <label>Notes about contacts:</label>
+ <hbox>
+ <hspace width="1cm"/>
+ <textbox font="&in.font;"
+ textwidth="70"
+ textheight="10"
+ slot="person.contacts"/>
+ </hbox>
+ <vspace height="3mm"/>
+ <button bgcolor="&bg.button;"
+ label="Help"
+ goto="help.business-contacts"/>
+ <!-- ************************************************************ -->
+ </vbox>
+ </hbox>
+ <!-- ************************** FOOTER ************************** -->
+ <vspace height="0px" fill="yes"/>
+ <hbox>
+ <hspace width="5mm"/>
+ &footline;
+ </hbox>
+ </mask>
+
+ </sequence>
+
+ <!-- ***************************************************************** -->
+ <!-- ***************************** ***************************** -->
+ <!-- ***************************** Help ***************************** -->
+ <!-- ***************************** ***************************** -->
+ <!-- ***************************************************************** -->
+
+ <mask name="help.department">
+ <!-- ************************** HEADER ************************** -->
+ <vspace height="5mm"/>
+ <hbox>
+ <hspace width="5mm"/>
+ <vbox>
+ &help.headline;
+ <!-- ************************** CONTENT ************************* -->
+ <label font="&h1.font;">Department</label>
+ <vspace height="3mm"/>
+ <textbox fgcolor="&fg.hilfe;"
+ bgcolor="&bg.hilfe;"
+ textheight="15"
+ textwidth="70"
+>The help system should be designed to help you filling out your form, but
+writing help texts is so stupid...
+</textbox>
+ <!-- ************************************************************ -->
+ </vbox>
+ </hbox>
+ <!-- ************************** FOOTER ************************** -->
+ <vspace height="0px" fill="yes"/>
+ <hbox>
+ <hspace width="5mm"/>
+ &help.footline;
+ </hbox>
+ </mask>
+
+ <mask name="help.business-contacts">
+ <!-- ************************** HEADER ************************** -->
+ <vspace height="5mm"/>
+ <hbox>
+ <hspace width="5mm"/>
+ <vbox>
+ &help.headline;
+ <!-- ************************** CONTENT ************************* -->
+ <label font="&h1.font;">Business Contacts</label>
+ <vspace height="3mm"/>
+ <textbox fgcolor="&fg.hilfe;"
+ bgcolor="&bg.hilfe;"
+ textheight="15"
+ textwidth="70"
+>It is often helpful to remember the last telephone and/or email contacts
+quickly.
+</textbox>
+ <!-- ************************************************************ -->
+ </vbox>
+ </hbox>
+ <!-- ************************** FOOTER ************************** -->
+ <vspace height="0px" fill="yes"/>
+ <hbox>
+ <hspace width="5mm"/>
+ &help.footline;
+ </hbox>
+ </mask>
+
+ <!-- ***************************************************************** -->
+ <!-- ***************************************************************** -->
+ <!-- ****************************** Info ***************************** -->
+ <!-- ***************************************************************** -->
+ <!-- ***************************************************************** -->
+
+ <mask name="info">
+ <!-- ************************** HEADER ************************** -->
+ <vspace height="5mm"/>
+ <hbox>
+ <hspace width="5mm"/>
+ <vbox>
+ &info.headline;
+ <!-- ************************** CONTENT ************************* -->
+ <vspace height="3mm"/>
+ <textbox fgcolor="&fg.hilfe;"
+ bgcolor="&bg.hilfe;"
+ textheight="15"
+ textwidth="70"
+><![CDATA[About "xmlforms":
+Version <unknown>,
+written by Gerd Stolpmann
+
+Contact: Gerd.Stolpmann@darmstadt.netsurf.de
+]]></textbox>
+ <!-- ************************************************************ -->
+ </vbox>
+ </hbox>
+ <!-- ************************** FOOTER ************************** -->
+ <vspace height="0px" fill="yes"/>
+ <hbox>
+ <hspace width="5mm"/>
+ &info.footline;
+ </hbox>
+ </mask>
+
+
+</application>
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE application SYSTEM "./ds-style.dtd" [
+ <!ENTITY vz '<button label="<" action="list-prev"/>
+ <button label=">" action="list-next"/>
+ <button label="exit" goto="first"/>'>
+]
+>
+
+<application start="first">
+<mask name="first" font="-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*">
+<vbox halign="right">
+<label>one</label>
+<label bgcolor="green">Number two</label>
+<hbox width="4cm" halign="center" valign="bottom" bgcolor="red" fgcolor="blue">
+<vbox>
+<label>a1</label>
+<vspace height="1cm"/>
+<label>a2</label>
+</vbox>
+<label>b
+c</label>
+</hbox>
+</vbox>
+<textbox slot="q" textheight="5" textwidth="60">A Text</textbox>
+<button label="sequence" goto="seq"/>
+<label bgcolor="blue">A very long label, bigger than the box</label>
+<vspace height="2cm" fill="yes"/>
+<hbox><button label="left" bgcolor="yellow" goto="second"/><hspace width="0px" fill="yes"/>
+<entry slot="a" textwidth="10" fgcolor="red">right</entry>
+</hbox>
+</mask>
+
+<mask name="second">
+<button label="main" bgcolor="yellow" goto="first"/>
+<button label="previous" action="hist-prev"/>
+<button label="save" action="save"/>
+</mask>
+
+<sequence name="seq">
+<mask name="n1">
+<label>n1</label>
+&vz;
+</mask>
+<mask name="n2">
+<label>n2</label>
+&vz;
+</mask>
+<mask name="n3">
+<label>n3</label>
+&vz;
+</mask>
+<mask name="n4">
+<label>n4</label>
+&vz;
+</mask>
+<mask name="n5">
+<label>n5</label>
+&vz;
+</mask>
+</sequence>
+
+</application>
--- /dev/null
+<?xml encoding="ISO-8859-1"?>
+<!-- $Id$ -->
+
+<!ELEMENT record (string)*>
+
+<!ELEMENT string (#PCDATA)>
+<!ATTLIST string
+ name ID #REQUIRED>
--- /dev/null
+<?xml encoding="ISO-8859-1"?>
+<!-- $Id$ -->
+
+<!-- entities describing content models -->
+
+<!ENTITY % vertical.only "vspace">
+<!ENTITY % horizontal.only "hspace">
+<!ENTITY % mixed "vbox|hbox|label|entry|textbox|button">
+
+
+<!-- entities describing attribute type -->
+
+<!ENTITY % att.valign "(top|bottom|center)">
+<!ENTITY % att.halign "(left|right|center)">
+
+
+<!ENTITY % default.atts "bgcolor CDATA #IMPLIED
+ fgcolor CDATA #IMPLIED
+ font CDATA #IMPLIED">
+
+<!-- "bgcolor", "fgcolor", and "font" are attribute applicable to every
+ element. They set the background color, foreground color, resp. the
+ font of the element and all sub elements that do not specifiy another
+ value.
+ Colors: all X windows names are allowed, e.g. "black", "white",
+ "lavenderblush", or "#A0B1C2".
+ Font: again X windows font names
+ -->
+
+
+<!ELEMENT application (mask|sequence)+>
+<!ATTLIST application
+ start IDREF #REQUIRED
+ %default.atts;
+>
+
+<!-- An "application" is the top-level element. The "start" attribute must
+ contain the name of the mask or mask sequence to start with.
+ -->
+
+
+<!ELEMENT sequence (mask)+>
+<!ATTLIST sequence
+ name ID #REQUIRED
+ %default.atts;
+>
+
+<!-- A "sequence" of masks. In a sequence, you can use the special button
+ actions "list-prev" and "list-next" that go to the previous mask resp.
+ the next mask of the sequence.
+ -->
+
+
+<!ELEMENT mask (%vertical.only;|%horizontal.only;|%mixed;)*>
+<!ATTLIST mask
+ name ID #REQUIRED
+ %default.atts;
+>
+
+<!-- A "mask" contains layout and functional elements of a visible page. -->
+
+
+<!ELEMENT vbox (%vertical.only;|%mixed;)*>
+<!ATTLIST vbox
+ halign %att.halign; "left"
+ %default.atts;
+>
+
+<!-- A "vbox" (vertical box) renders the inner material in vertical direction.
+ The "halign" attribute specifies whether the inner material should be
+ left-aligned, right-aligned, or centered.
+ -->
+
+<!ELEMENT hbox (%horizontal.only;|%mixed;)*>
+<!ATTLIST hbox
+ width CDATA #IMPLIED
+ halign %att.halign; "left"
+ valign %att.valign; "top"
+ %default.atts;
+>
+
+<!-- An "hbox" (horizontal box) renders the inner material in horizontal
+ direction. The "valign" attribute specifies whether the inner material
+ should be top-aligned, bottom-aligned, or centered.
+ Normally, the width of an hbox is the sum of its members, but you can
+ also widen a box by specifying the "width" attribute. This is a number
+ with a dimension, e.g. "10.5 cm", "105 mm", "4.13 in". Other dimensions
+ are "pt" (points) and "px" (pixels).
+ If "width" is given, you may also set "halign" (see vbox for possible
+ values).
+ -->
+
+<!ELEMENT vspace EMPTY>
+<!ATTLIST vspace
+ height CDATA #REQUIRED
+ fill (yes|no) "no"
+ %default.atts;
+>
+
+<!-- "vspace" is a vertical space of given "height" (again a number with a
+ dimension, see hbox).
+ If "fill" is "yes", the space is extended as much as possible.
+ -->
+
+<!ELEMENT hspace EMPTY>
+<!ATTLIST hspace
+ width CDATA #REQUIRED
+ fill (yes|no) "no"
+ %default.atts;
+>
+
+<!-- "hspace" is a horizontal space of given "width" (again a number with a
+ dimension, see hbox).
+ If "fill" is "yes", the space is extended as much as possible.
+ -->
+
+<!ELEMENT label (#PCDATA)>
+<!ATTLIST label
+ textwidth CDATA #IMPLIED
+ halign %att.halign; "left"
+ %default.atts;
+>
+
+<!-- A "label" is a piece of constant text. The text is included as #PCDATA
+ in the element.
+ You may set "textwidth" to a (dimensionless) number to specify a fixed
+ width. In this case, "halign" determines the horizontal alignment.
+ -->
+
+<!ELEMENT entry (#PCDATA)>
+<!ATTLIST entry
+ textwidth CDATA #REQUIRED
+ slot NMTOKEN #REQUIRED
+ %default.atts;
+>
+
+<!-- An "entry" is an editable text line. "textwidth" specifies the width of
+ the visible line (but the contents can be longer). "slot" is the name of
+ a slot that is associated with the element.
+ If the element contains #PCDATA, this is used as default value if
+ the slot has not yet been filled.
+ -->
+
+<!ELEMENT textbox (#PCDATA)>
+<!ATTLIST textbox
+ textwidth CDATA #REQUIRED
+ textheight CDATA #REQUIRED
+ slot NMTOKEN #IMPLIED
+ %default.atts;
+>
+
+<!-- A "textbox" is a text box with dimensions "textwidth" and "textheight"
+ (both dimensionless number).
+ "slot" is the name of a slot that is associated with the element.
+ If the element contains #PCDATA, this is used as default value if
+ the slot has not yet been filled.
+ If you omit "slot", the #PCDATA is displayed read-only.
+ -->
+
+<!ELEMENT button EMPTY>
+<!ATTLIST button
+ label CDATA #REQUIRED
+ action (goto|save|exit|save-exit|list-prev|list-next|
+ hist-prev|hist-next) "goto"
+ goto IDREF #IMPLIED
+ %default.atts;
+>
+
+<!-- A "button" is specified as follows:
+ - "label" is what is written on the button
+ - "action" specifies what to if the button is pressed:
+ - "goto": jump to another mask or mask sequence whose name is given
+ in the attribute "goto"
+ - "save": save the record
+ - "exit": exit the application
+ - "save-exit": save, then exit
+ - "list-prev": jump to the previous mask in the sequence
+ - "list-next": jump to the next mask in the sequence
+ - "hist-prev": jump to the mask that has actually been the predecessor
+ - "hist-next": jump to the mask that has actually been the successor
+ -->
+
+
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE application SYSTEM "./ds-style.dtd" [
+]>
+
+<application start="first">
+<mask name="first" font="-*-lucidatypewriter-medium-r-*-*-12-*-*-*-*-*-*-*">
+<label>This is a label</label>
+</mask>
+</application>
--- /dev/null
+all_iso88591: generate_iso88591
+ $(MAKE) -f Makefile.code all_iso88591
+
+opt_iso88591: generate_iso88591
+ $(MAKE) -f Makefile.code opt_iso88591
+
+all_utf8: generate_utf8
+ $(MAKE) -f Makefile.code all_utf8
+
+opt_utf8: generate_utf8
+ $(MAKE) -f Makefile.code opt_utf8
+
+
+
+generate_iso88591:
+ $(MAKE) -f Makefile.generate all_iso88591
+ rm -f objects_iso88591 objects_utf8
+ $(MAKE) -f Makefile.generate objects_iso88591
+ touch objects_utf8
+ $(MAKE) -f Makefile.generate depend
+
+generate_utf8:
+ $(MAKE) -f Makefile.generate all_utf8
+ rm -f objects_iso88591 objects_utf8
+ $(MAKE) -f Makefile.generate objects_utf8
+ touch objects_iso88591
+ $(MAKE) -f Makefile.generate depend
+
+
+
+clean:
+ touch depend objects
+ $(MAKE) -f Makefile.code clean
+ $(MAKE) -f Makefile.generate clean
--- /dev/null
+
+LARCHIVE_iso88591 = pxp_lex_iso88591.cma
+LARCHIVE_utf8 = pxp_lex_utf8.cma
+XLARCHIVE_iso88591 = $(LARCHIVE_iso88591:.cma=.cmxa)
+XLARCHIVE_utf8 = $(LARCHIVE_utf8:.cma=.cmxa)
+
+# LOBJECTS_* and XLOBJECTS_* are included from "objects_*":
+include objects_iso88591
+include objects_utf8
+
+#----------------------------------------------------------------------
+
+all_iso88591: $(LARCHIVE_iso88591)
+opt_iso88591: $(XLARCHIVE_iso88591)
+all_utf8: $(LARCHIVE_utf8)
+opt_utf8: $(XLARCHIVE_utf8)
+
+$(LARCHIVE_iso88591): $(LOBJECTS_iso88591)
+ $(OCAMLC) -a -o $(LARCHIVE_iso88591) $(LOBJECTS_iso88591)
+
+$(XLARCHIVE_iso88591): $(XLOBJECTS_iso88591)
+ $(OCAMLOPT) -a -o $(XLARCHIVE_iso88591) $(XLOBJECTS_iso88591)
+
+$(LARCHIVE_utf8): $(LOBJECTS_utf8)
+ $(OCAMLC) -a -o $(LARCHIVE_utf8) $(LOBJECTS_utf8)
+
+$(XLARCHIVE_utf8): $(XLOBJECTS_utf8)
+ $(OCAMLOPT) -a -o $(XLARCHIVE_utf8) $(XLOBJECTS_utf8)
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS)
+OCAMLOPT = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS)
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+
+*.mli:
+
+clean:
+ rm -f *.cmo *.cmx *.cma *.cmxa *.cmi *.o *.a
+
+include depend
--- /dev/null
+LEXERSRC = pxp_lex_misc.src \
+ pxp_lex_document.src \
+ pxp_lex_content.src \
+ pxp_lex_within_tag.src \
+ pxp_lex_document_type.src \
+ pxp_lex_declaration.src \
+ pxp_lex_dtd_string.src \
+ pxp_lex_content_string.src \
+ pxp_lex_name_string.src
+
+OTHERSRC = open_pxp_lex_aux_iso88591.src \
+ pxp_lex_aux.src \
+ pxp_lex_defs_iso88591.def
+
+LEXERMLL_iso88591 = $(LEXERSRC:.src=_iso88591.mll)
+LEXERMLL_utf8 = $(LEXERSRC:.src=_utf8.mll)
+
+LEXERML_iso88591 = $(LEXERSRC:.src=_iso88591.ml)
+LEXERML_utf8 = $(LEXERSRC:.src=_utf8.ml)
+
+LEXERCMO_iso88591 = pxp_lex_aux_iso88591.cmo $(LEXERSRC:.src=_iso88591.cmo)
+LEXERCMO_utf8 = pxp_lex_aux_utf8.cmo $(LEXERSRC:.src=_utf8.cmo)
+
+LEXERCMX_iso88591 = $(LEXERCMO_iso88591:.cmo=.cmx)
+LEXERCMX_utf8 = $(LEXERCMO_utf8:.cmo=.cmx)
+
+.PHONY: all_iso88591
+all_iso88591: iso88591_done
+
+.PHONY: all_utf8
+all_utf8: utf8_done
+
+iso88591_done: $(LEXERSRC) $(OTHERSRC)
+ ../tools/insert_variant -variant iso88591 $(LEXERSRC)
+ for file in $(LEXERMLL_iso88591); do ocamllex $$file; done
+ touch iso88591_done
+
+utf8_done: $(LEXERSRC) $(OTHERSRC) pxp_lex_defs_utf8.def
+ ../tools/insert_variant -variant utf8 $(LEXERSRC)
+ for file in $(LEXERMLL_utf8); do ocamllex $$file; done
+ touch utf8_done
+
+pxp_lex_defs_utf8.def: pxp_lex_defs_generic.def pxp_lex_defs_drv_utf8.def
+ ../tools/ucs2_to_utf8/ucs2_to_utf8 <pxp_lex_defs_generic.def \
+ >pxp_lex_defs_utf8.def || \
+ rm -f pxp_lex_defs_utf8.def
+ cat pxp_lex_defs_drv_utf8.def >>pxp_lex_defs_utf8.def
+
+objects_iso88591:
+ echo LOBJECTS_iso88591 = $(LEXERCMO_iso88591) >objects_iso88591
+ echo XLOBJECTS_iso88591 = $(LEXERCMX_iso88591) >>objects_iso88591
+
+objects_utf8:
+ echo LOBJECTS_utf8 = $(LEXERCMO_utf8) >objects_utf8
+ echo XLOBJECTS_utf8 = $(LEXERCMX_utf8) >>objects_utf8
+
+depend: *.ml *.mli
+ ocamldep *.ml *.mli >depend
+
+.PHONY: clean
+clean:
+ rm -f $(LEXERMLL_iso88591) $(LEXERML_iso88591) iso88591_done \
+ $(LEXERMLL_utf8) $(LEXERML_utf8) utf8_done \
+ pxp_lex_defs_utf8.def \
+ objects_iso88591 objects_utf8 depend
+
+*.mli:
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_aux_iso88591
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_aux_utf8
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_misc_iso88591
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_lex_misc_utf8
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+ class dummy_entity = object end
+
+ let dummy_entity = ( new dummy_entity : entity_id )
+
+ (* The following tokens are pre-allocated to reduce the load on the
+ * GC.
+ *)
+
+ let tok_Doctype__Document_type = Doctype dummy_entity, Document_type
+ let tok_Ignore__Document = Ignore, Document
+ let tok_Ignore__Within_tag = Ignore, Within_tag
+ let tok_Ignore__Document_type = Ignore, Document_type
+ let tok_Ignore__Declaration = Ignore, Declaration
+ let tok_Ignore__Ignored = Ignore, Ignored_section
+ let tok_Eof__Document = Eof, Document
+ let tok_Eof__Content = Eof, Content
+ let tok_Eof__Within_tag = Eof, Within_tag
+ let tok_Eof__Document_type = Eof, Document_type
+ let tok_Eof__Declaration = Eof, Declaration
+ let tok_Eof__Ignored = Eof, Ignored_section
+ let tok_LineEndCRLF__Content = LineEnd "\r\n", Content
+ let tok_LineEndCR__Content = LineEnd "\r", Content
+ let tok_LineEndLF__Content = LineEnd "\n", Content
+ let tok_CharDataRBRACKET__Content = CharData "]", Content
+ let tok_Eq__Within_tag = Eq, Within_tag
+ let tok_Rangle__Content = Rangle, Content
+ let tok_Rangle_empty__Content = Rangle_empty, Content
+ let tok_Dtd_begin__Declaration = Dtd_begin dummy_entity, Declaration
+ let tok_Doctype_rangle__Document = Doctype_rangle dummy_entity, Document
+ let tok_Percent__Declaration = Percent, Declaration
+ let tok_Plus__Declaration = Plus, Declaration
+ let tok_Star__Declaration = Star, Declaration
+ let tok_Bar__Declaration = Bar, Declaration
+ let tok_Comma__Declaration = Comma, Declaration
+ let tok_Qmark__Declaration = Qmark, Declaration
+ let tok_Lparen__Declaration = Lparen dummy_entity, Declaration
+ let tok_RparenPlus__Declaration = RparenPlus dummy_entity, Declaration
+ let tok_RparenStar__Declaration = RparenStar dummy_entity, Declaration
+ let tok_RparenQmark__Declaration = RparenQmark dummy_entity, Declaration
+ let tok_Rparen__Declaration = Rparen dummy_entity, Declaration
+ let tok_Required__Declaration = Required, Declaration
+ let tok_Implied__Declaration = Implied, Declaration
+ let tok_Fixed__Declaration = Fixed, Declaration
+ let tok_Pcdata__Declaration = Pcdata, Declaration
+ let tok_Decl_element__Declaration = Decl_element dummy_entity, Declaration
+ let tok_Decl_attlist__Declaration = Decl_attlist dummy_entity, Declaration
+ let tok_Decl_entity__Declaration = Decl_entity dummy_entity, Declaration
+ let tok_Decl_notation__Declaration = Decl_notation dummy_entity, Declaration
+ let tok_Conditional_begin__Declaration = Conditional_begin dummy_entity,
+ Declaration
+ let tok_Conditional_begin__Ignored = Conditional_begin dummy_entity,
+ Ignored_section
+ let tok_Conditional_end__Declaration = Conditional_end dummy_entity,
+ Declaration
+ let tok_Conditional_end__Ignored = Conditional_end dummy_entity,
+ Ignored_section
+ let tok_Conditional_body__Declaration = Conditional_body dummy_entity,
+ Declaration
+ let tok_Decl_rangle__Declaration = Decl_rangle dummy_entity, Declaration
+ let tok_Dtd_end__Document_type = Dtd_end dummy_entity, Document_type
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_utf8 *)
+
+ open Pxp_types
+ open Pxp_lexer_types
+
+ let get_name_end s k =
+ (* Get the index of the end+1 of the name beginning at position k *)
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ | ('\009'|'\010'|'\013'|'\032') -> j
+ |_ -> find (j+1)
+ else
+ l
+ in
+ find k
+
+ let get_ws_end s k =
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ (' '|'\t'|'\r'|'\n') -> find (j+1)
+ | _ -> j
+ else
+ l
+ in
+ find k
+
+ let scan_pi pi xml_scanner =
+ let s = String.sub pi 2 (String.length pi - 4) in
+ (* the PI without the leading "<?" and the trailing "?>" *)
+ let xml_lexbuf = Lexing.from_string (s ^ " ") in
+ (* Add space because the lexer expects whitespace after every
+ * clause; by adding a space there is always whitespace at the
+ * end of the string.
+ *)
+
+ (* The first word of a PI must be a name: Extract it. *)
+
+ let s_name, s_len =
+ match xml_scanner xml_lexbuf with
+ Pro_name n ->
+ let ltok = String.length (Lexing.lexeme xml_lexbuf) in
+ if String.length n = ltok then
+ (* No whitespace after the name *)
+ raise (WF_error ("Bad processing instruction"));
+ n, ltok
+ | _ -> raise (WF_error ("Bad processing instruction"))
+ in
+
+ (* Note: s_len is the length of s_name + the whitespace following s_name *)
+
+ match s_name with
+ "xml" -> begin
+ (* It is a <?xml ...?> PI: Get the other tokens *)
+ let rec collect () =
+ let t = xml_scanner xml_lexbuf in
+ (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
+ if t = Pro_eof then
+ []
+ else
+ t :: collect()
+ in
+ PI_xml (collect())
+ end
+ | _ ->
+ let len_param = String.length s - s_len in
+ (* It is possible that len_param = -1 *)
+ if len_param >= 1 then
+ PI(s_name, String.sub s s_len len_param)
+ else
+ PI(s_name, "")
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* NOTE: Currently, this module is *identical* to Pxp_lex_aux_iso88591 *)
+
+ open Pxp_types
+ open Pxp_lexer_types
+
+ let get_name_end s k =
+ (* Get the index of the end+1 of the name beginning at position k *)
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ | ('\009'|'\010'|'\013'|'\032') -> j
+ |_ -> find (j+1)
+ else
+ l
+ in
+ find k
+
+ let get_ws_end s k =
+ let l = String.length s in
+ let rec find j =
+ if j < l then
+ match s.[j] with
+ (' '|'\t'|'\r'|'\n') -> find (j+1)
+ | _ -> j
+ else
+ l
+ in
+ find k
+
+ let scan_pi pi xml_scanner =
+ let s = String.sub pi 2 (String.length pi - 4) in
+ (* the PI without the leading "<?" and the trailing "?>" *)
+ let xml_lexbuf = Lexing.from_string (s ^ " ") in
+ (* Add space because the lexer expects whitespace after every
+ * clause; by adding a space there is always whitespace at the
+ * end of the string.
+ *)
+
+ (* The first word of a PI must be a name: Extract it. *)
+
+ let s_name, s_len =
+ match xml_scanner xml_lexbuf with
+ Pro_name n ->
+ let ltok = String.length (Lexing.lexeme xml_lexbuf) in
+ if String.length n = ltok then
+ (* No whitespace after the name *)
+ raise (WF_error ("Bad processing instruction"));
+ n, ltok
+ | _ -> raise (WF_error ("Bad processing instruction"))
+ in
+
+ (* Note: s_len is the length of s_name + the whitespace following s_name *)
+
+ match s_name with
+ "xml" -> begin
+ (* It is a <?xml ...?> PI: Get the other tokens *)
+ let rec collect () =
+ let t = xml_scanner xml_lexbuf in
+ (* prerr_endline (string_of_int (Lexing.lexeme_end xml_lexbuf)); *)
+ if t = Pro_eof then
+ []
+ else
+ t :: collect()
+ in
+ PI_xml (collect())
+ end
+ | _ ->
+ let len_param = String.length s - s_len in
+ (* It is possible that len_param = -1 *)
+ if len_param >= 1 then
+ PI(s_name, String.sub s s_len len_param)
+ else
+ PI(s_name, "")
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_content = parse
+ "<?" pi_string "?>"
+ { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Content }
+ | "<?"
+ { raise (WF_error ("Illegal processing instruction")) }
+ | "<!--"
+ { Comment_begin, Content_comment }
+ | '<' '/'? name
+ (* One rule for Tag_beg and Tag_end saves transitions. *)
+ { let s = Lexing.lexeme lexbuf in
+ if s.[1] = '/' then
+ Tag_end (String.sub s 2 (String.length s - 2), dummy_entity),
+ Within_tag
+ else
+ Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity),
+ Within_tag
+ }
+ | "<![CDATA[" cdata_string "]]>"
+ { let s = Lexing.lexeme lexbuf in
+ Cdata (String.sub s 9 (String.length s - 12)), Content }
+ | "<!"
+ { raise (WF_error "Declaration either malformed or not allowed in this context")
+ }
+ | "<"
+ { raise (WF_error ("The left angle bracket '<' must be written as '<'"))
+ }
+ | "&#" ascii_digit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string (String.sub s 2 (String.length s - 3))), Content }
+ | "&#x" ascii_hexdigit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))), Content }
+ | "&" name ";"
+ { let s = Lexing.lexeme lexbuf in
+ ERef (String.sub s 1 (String.length s - 2)), Content }
+ | "&"
+ { raise (WF_error ("The ampersand '&' must be written as '&'"))
+ }
+
+ (* LineEnd: Depending on whether we are reading from a primary source
+ * (file) or from the replacement text of an internal entity, line endings
+ * must be normalized (converted to \n) or not.
+ * The entity classes do that. The yacc parser will never see LineEnd;
+ * this token is always converted to the appropriate CharData token.
+ *)
+
+ | '\013' '\010'
+ { tok_LineEndCRLF__Content }
+ | '\013'
+ { tok_LineEndCR__Content }
+ | '\010'
+ { tok_LineEndLF__Content }
+ | eof
+ { tok_Eof__Content }
+ | "]]>"
+ { raise (WF_error ("The sequence ']]>' must be written as ']]>'"))
+ }
+ | "]"
+ { tok_CharDataRBRACKET__Content }
+ | normal_character+
+ { let s = Lexing.lexeme lexbuf in
+ CharData s, Content
+ }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+(* This lexer is used to expand and normalize attribute values: *)
+
+rule scan_content_string = parse
+ '&' name ';'
+ { let s = Lexing.lexeme lexbuf in
+ ERef (String.sub s 1 (String.length s - 2)) }
+ | "&#" ascii_digit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string (String.sub s 2 (String.length s - 3))) }
+ | "&#x" ascii_hexdigit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))) }
+ | '&'
+ { raise(WF_error("The character '&' must be written as '&'")) }
+ | printable_character_except_amp_lt+
+ { CharData (Lexing.lexeme lexbuf) }
+ | '\009'
+ { CRef 32 }
+ | '\013' '\010'
+ { CRef(-1) (* A special case *)
+ }
+ | '\013'
+ { CRef 32 }
+ | '\010'
+ { CRef 32 }
+ | '<'
+ {
+ (* Depending on the situation, '<' may be legal or not: *)
+ CharData "<"
+ }
+ | eof
+ { Eof }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+(* scan_declaration: after "[" in DTD until matching "]" *)
+
+rule scan_declaration = parse
+ ws+
+ { tok_Ignore__Declaration }
+ | '%' name ';'
+ { let s = Lexing.lexeme lexbuf in
+ (PERef (String.sub s 1 (String.length s - 2))), Declaration }
+ | '%'
+ { tok_Percent__Declaration }
+ | '&'
+ { raise(WF_error("References to general entities not allowed in DTDs")) }
+ | name
+ { Name (Lexing.lexeme lexbuf), Declaration }
+ | nmtoken
+ { Nametoken (Lexing.lexeme lexbuf), Declaration }
+ | '+'
+ { tok_Plus__Declaration }
+ | '*'
+ { tok_Star__Declaration }
+ | '|'
+ { tok_Bar__Declaration }
+ | ','
+ { tok_Comma__Declaration }
+ | '?'
+ { tok_Qmark__Declaration }
+ | '('
+ { tok_Lparen__Declaration }
+ | ")+"
+ { tok_RparenPlus__Declaration }
+ | ")*"
+ { tok_RparenStar__Declaration }
+ | ")?"
+ { tok_RparenQmark__Declaration }
+ | ')'
+ { tok_Rparen__Declaration }
+ | "#REQUIRED"
+ { tok_Required__Declaration }
+ | "#IMPLIED"
+ { tok_Implied__Declaration }
+ | "#FIXED"
+ { tok_Fixed__Declaration }
+ | "#PCDATA"
+ { tok_Pcdata__Declaration }
+ | "<!ELEMENT"
+ { tok_Decl_element__Declaration }
+ | "<!ATTLIST"
+ { tok_Decl_attlist__Declaration }
+ | "<!ENTITY"
+ { tok_Decl_entity__Declaration }
+ | "<!NOTATION"
+ { tok_Decl_notation__Declaration }
+ | "<!--"
+ { Comment_begin, Decl_comment }
+ | "<!["
+ { tok_Conditional_begin__Declaration }
+ | "]]>"
+ { tok_Conditional_end__Declaration }
+ | "["
+ { tok_Conditional_body__Declaration }
+
+ (* TODO: PIs modified *)
+
+ | "<?" pi_string "?>"
+ { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Declaration }
+ | "<?"
+ { raise (WF_error ("Illegal processing instruction")) }
+ | '"' [^ '"']* '"'
+ { let s = Lexing.lexeme lexbuf in
+ (* Check that characters are well-formed: *)
+ ignore(scan_characters (Lexing.from_string s));
+ (Unparsed_string (String.sub s 1 (String.length s - 2))), Declaration }
+ | '"'
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | "'" [^ '\'']* "'"
+ { let s = Lexing.lexeme lexbuf in
+ (* Check that characters are well-formed: *)
+ ignore(scan_characters (Lexing.from_string s));
+ (Unparsed_string (String.sub s 1 (String.length s - 2))), Declaration }
+ | "'"
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | '>'
+ { tok_Decl_rangle__Declaration }
+ | ']'
+ { tok_Dtd_end__Document_type }
+ | eof
+ { tok_Eof__Declaration }
+ | "<!"
+ { raise (WF_error "Declaration either malformed or not allowed in this context")
+ }
+ | character
+ { raise (WF_error("Illegal token or character")) }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let ws = [ ' ' '\t' '\r' '\n' ]
+
+let ascii_digit = ['0'-'9']
+
+let ascii_hexdigit = ['0'-'9' 'a'-'h' 'A'-'H']
+
+let namechar = letter | digit | '.' | ':' | '-' | '_' | combiningChar | extender
+
+let name = ( letter | '_' | ':' ) namechar*
+
+let nmtoken = namechar+
+
+(* Valid characters are:
+ * #9, #10, #13, #32-#xD7FF, #xE000-#xFFFD, #x10000-#x10FFFF
+ *
+ * #xD7FF as UTF-8 sequence:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...D 10...7.. 10.F...F = ED 9F BF
+ *
+ * #xE000 as UTF-8 sequence:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...E 10...0.. 10.0...0 = EE 80 80
+ *
+ * UTF-8 sequence CF BE BF as character:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...F 10111110 10111111 = #FFBF
+ *
+ * #xFFFD as UTF-8 sequence:
+ * 1110xxxx 10xxxxxx 10xxxxxx
+ * 1110...F 10...F.. 10.F...D = EF BF BD
+ *
+ * #x010000 as UTF-8 sequence:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 111100.. 10.1...0 10...0.. 10.0...0 = F0 90 80 80
+ *
+ * #x10FFFF as UTF-8 sequence:
+ * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ * 111101.. 10.0...F 10...F.. 10.F...F = F4 8F BF BF
+ *)
+
+
+let non_ascii_character =
+ ['\192'-'\223'] ['\128'-'\191'] (* #x80-#x7FF *)
+| ['\224'-'\236'] ['\128'-'\191'] ['\128'-'\191'] (* #x800-#xCFFF *)
+| '\237' ['\128'-'\159'] ['\128'-'\191'] (* #xD000-#xD7FF *)
+| '\238' ['\128'-'\191'] ['\128'-'\191'] (* #xE000-#xEFFF *)
+| '\239' ['\128'-'\190'] ['\128'-'\191'] (* #xF000-#xFFBF *)
+| '\239' '\191' ['\128'-'\189'] (* #xFFC0-#xFFFD *)
+| '\240' ['\144'-'\191'] ['\128'-'\191'] ['\128'-'\191']
+ (* #x010000-#x03FFFF *)
+| ['\241'-'\243'] ['\128'-'\191'] ['\128'-'\191'] ['\128'-'\191']
+ (* #x040000-#x0FFFFF *)
+| '\244' ['\128'-'\143'] ['\128'-'\191'] ['\128'-'\191']
+ (* #x100000-#10FFFFF *)
+
+let character =
+ [ '\009' '\010' '\013' '\032'-'\127' ]
+| non_ascii_character
+
+
+let character_except_question_mark = (* '?' = '\063' *)
+ [ '\009' '\010' '\013' '\032'-'\062' '\064'-'\127' ]
+| non_ascii_character
+
+
+let character_except_right_angle_bracket = (* '>' = '\062' *)
+ [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\127' ]
+| non_ascii_character
+
+
+let character_except_minus = (* '-' = '\045' *)
+ [ '\009' '\010' '\013' '\032'-'\044' '\046'-'\127' ]
+| non_ascii_character
+
+
+let character_except_quot = (* '"' = '\034' *)
+ [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\255' ]
+| non_ascii_character
+
+
+let character_except_apos = (* '\'' = '\039' *)
+ [ '\009' '\010' '\013' '\032'-'\038' '\040'-'\255' ]
+| non_ascii_character
+
+
+let pi_string = character_except_question_mark*
+ ( '?' character_except_right_angle_bracket
+ character_except_question_mark* )*
+ '?'?
+
+
+let comment_string = character_except_minus*
+ ('-' character_except_minus+ )*
+
+
+let normal_character =
+ (* Character except '&' = '\038', '<' = '\060', ']' = '\093', and CR LF *)
+ [ '\009' '\032'-'\037' '\039'-'\059' '\061'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+
+let character_except_rbracket = (* ']' = '\093' *)
+ [ '\009' '\010' '\013' '\032'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+
+let character_except_rbracket_rangle = (* ']' = '\093', '>' = '\062' *)
+ [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+
+let cdata_string =
+ character_except_rbracket*
+ ( "]" character_except_rbracket+ |
+ "]]" ']'* character_except_rbracket_rangle character_except_rbracket*
+ )*
+ ']'*
+
+
+let printable_character_except_amp_lt =
+ (* '&' = '\038', '<' = '\060' *)
+ [ '\032'-'\037' '\039'-'\059' '\061'-'\127']
+| non_ascii_character
+
+
+let printable_character_except_amp_percent =
+ (* '%' = '\037', '&' = '\038' *)
+ [ '\032'-'\036' '\039'-'\127']
+| non_ascii_character
+
+
+let character_except_special =
+ (* '<'=060, ']'=093, '"'=034, '\''=039 *)
+ [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\038' '\040'-'\059'
+ '\061'-'\092' '\094'-'\127' ]
+| non_ascii_character
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/26 19:58:08 gerd
+ * Bugfix in character_except_apos. The bug caused that attribute
+ * values delimited by ' could not be scanned at all.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(*****************************************************************)
+(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
+(* 14/05/2000 *)
+(* *)
+(* These are taken from the appendix B of the XML reccomendation *)
+(* *)
+(*****************************************************************)
+
+(* 85 *)
+let baseChar =
+ [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6]
+ | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148]
+ | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
+ | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386
+ | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE]
+ | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3]
+ | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
+ | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB]
+ | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559
+ | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
+ | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE]
+ | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
+ | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8]
+ | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD]
+ | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
+ | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36]
+ | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74]
+ | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
+ | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0
+ | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
+ | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D]
+ | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95]
+ | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
+ | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C]
+ | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39]
+ | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
+ | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1]
+ | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39]
+ | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
+ | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A
+ | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5
+ | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
+ | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69]
+ | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103]
+ | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C
+ | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159
+ | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E]
+ | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF]
+ | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9
+ | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
+ | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B
+ | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE
+ | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB]
+ | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
+ | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094]
+ | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]
+;;
+
+(* 86 *)
+let ideographic = [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] ;;
+
+(* 84 *)
+let letter = baseChar | ideographic ;;
+
+(* 87 *)
+let combiningChar =
+ [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1]
+ | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4
+ | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF]
+ | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903]
+ | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963]
+ | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4]
+ | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02
+ | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48]
+ | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC
+ | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
+ | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D]
+ | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8]
+ | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
+ | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83]
+ | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6]
+ | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
+ | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1
+ | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
+ | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84]
+ | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD]
+ | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
+ | #x3099 | #x309A
+;;
+
+(* 88 *)
+let digit =
+ [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F]
+ | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F]
+ | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
+ | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
+;;
+
+(* 89 *)
+let extender =
+ #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005
+ | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let ws = [ ' ' '\t' '\r' '\n' ]
+
+(* Note: ISO-8859-1 charset does not have 'combining characters' *)
+
+let letter = ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255']
+let extender = '\183'
+let digit = ['0'-'9']
+let ascii_digit = ['0'-'9']
+let ascii_hexdigit = ['0'-'9' 'A'-'F' 'a'-'f']
+let namechar = letter | digit | '.' | ':' | '-' | '_' | extender
+let name = ( letter | '_' | ':' ) namechar*
+let nmtoken = namechar+
+
+let character = ['\009' '\010' '\013' '\032'-'\255']
+
+let character_except_question_mark = (* '?' = '\063' *)
+ [ '\009' '\010' '\013' '\032'-'\062' '\064'-'\255' ]
+
+let character_except_right_angle_bracket = (* '>' = '\062' *)
+ [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\255' ]
+
+let character_except_minus = (* '-' = '\045' *)
+ [ '\009' '\010' '\013' '\032'-'\044' '\046'-'\255' ]
+
+let character_except_quot = (* '"' = '\034' *)
+ [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\255' ]
+
+let character_except_apos = (* '\'' = '\039' *)
+ [ '\009' '\010' '\013' '\032'-'\038' '\040'-'\255' ]
+
+let pi_string = character_except_question_mark*
+ ( '?' character_except_right_angle_bracket
+ character_except_question_mark* )*
+ '?'?
+
+let comment_string = character_except_minus*
+ ('-' character_except_minus+ )*
+
+let normal_character =
+ [^ '&' '<' ']' '\000'-'\008' '\010'-'\031']
+
+let character_except_rbracket = (* ']' = '\093' *)
+ [ '\009' '\010' '\013' '\032'-'\092' '\094'-'\255' ]
+
+let character_except_rbracket_rangle = (* ']' = '\093', '>' = '\062' *)
+ [ '\009' '\010' '\013' '\032'-'\061' '\063'-'\092' '\094'-'\255' ]
+
+let cdata_string =
+ character_except_rbracket*
+ ( "]" character_except_rbracket+ |
+ "]]" ']'* character_except_rbracket_rangle character_except_rbracket*
+ )*
+ ']'*
+(* cdata_string = char* - ( char* ']]>' char* ) *)
+
+let printable_character_except_amp_lt =
+ (* '&' = '\038', '<' = '\060' *)
+ [ '\032'-'\037' '\039'-'\059' '\061'-'\255']
+
+let printable_character_except_amp_percent =
+ (* '%' = '\037', '&' = '\038' *)
+ [ '\032'-'\036' '\039'-'\255']
+
+let character_except_special =
+ (* '<'=060, ']'=093, '"'=034, '\''=039 *)
+ [ '\009' '\010' '\013' '\032'-'\033' '\035'-'\038' '\040'-'\059'
+ '\061'-'\092' '\094'-'\255' ]
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert pxp_lex_aux.src
+
+#insert open_pxp_lex_aux_*.src
+#insert open_pxp_lex_misc_*.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+
+(* scan_document: Lexer for the outermost structures *)
+
+rule scan_document = parse
+ "<?" pi_string "?>"
+ { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi, Document }
+ | "<?"
+ { raise (WF_error ("Illegal processing instruction")) }
+ | "<!DOCTYPE"
+ { tok_Doctype__Document_type }
+ | "<!--"
+ { Comment_begin, Document_comment }
+ | "<!"
+ { raise (WF_error "Declaration either malformed or not allowed in this context")
+ }
+ | "<" name
+ { let s = Lexing.lexeme lexbuf in
+ Tag_beg (String.sub s 1 (String.length s - 1), dummy_entity), Within_tag
+ }
+ | '<'
+ { raise (WF_error ("Illegal token")) }
+ | ws+
+ { tok_Ignore__Document }
+ | eof
+ { tok_Eof__Document }
+ | character
+ { raise (WF_error ("Content not allowed here")) }
+ | _
+ { raise Netconversion.Malformed_code }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+
+(* scan_document_type: after "<!DOCTYPE" until matching ">" *)
+
+rule scan_document_type = parse
+ name
+ { let s = Lexing.lexeme lexbuf in
+ Name s, Document_type }
+ | ws+
+ { tok_Ignore__Document_type }
+ | '"' character_except_quot* '"'
+ { let s = Lexing.lexeme lexbuf in
+ (Unparsed_string (String.sub s 1 (String.length s - 2))), Document_type }
+ | '"'
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | "'" character_except_apos* "'"
+ { let s = Lexing.lexeme lexbuf in
+ (Unparsed_string (String.sub s 1 (String.length s - 2))), Document_type }
+ | "'"
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | '['
+ { tok_Dtd_begin__Declaration }
+ | '>'
+ { tok_Doctype_rangle__Document }
+ | eof
+ { tok_Eof__Document_type }
+ | '&'
+ { raise (WF_error("References to general entities not allowed here")) }
+ | '%'
+ { raise (WF_error("References to parameter entities not allowed here")) }
+ | character
+ { raise (WF_error("Content not allowed here")) }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+(* The following scanner is used to determine the replacement text of
+ * internal entities:
+ *)
+
+rule scan_dtd_string = parse
+ '%' name ';'
+ { let s = Lexing.lexeme lexbuf in
+ PERef (String.sub s 1 (String.length s - 2)) }
+ | '%'
+ { raise(WF_error("The character '%' must be written as '%'")) }
+ | '&' name ';'
+ { let s = Lexing.lexeme lexbuf in
+ ERef (String.sub s 1 (String.length s - 2)) }
+ | "&#" ascii_digit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string (String.sub s 2 (String.length s - 3))) }
+ | "&#x" ascii_hexdigit+ ";"
+ { let s = Lexing.lexeme lexbuf in
+ CRef (int_of_string ("0x" ^ String.sub s 3 (String.length s - 4))) }
+ | '&'
+ { raise(WF_error("The character '&' must be written as '&'")) }
+ | '\013' '\010'
+ { CRef(-1) }
+ | '\013'
+ { CRef(-2) }
+ | '\010'
+ { CRef(-3) }
+ | '\009'
+ { CharData "\009" }
+ | printable_character_except_amp_percent+
+ { CharData (Lexing.lexeme lexbuf) }
+ | eof
+ { Eof }
+ | _
+ { raise Netconversion.Malformed_code }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+(* The remaining, smaller lexers *)
+
+rule scan_characters = parse
+ character*
+ { () }
+| eof
+ { () }
+| _
+ { raise Netconversion.Malformed_code }
+
+
+and scan_xml_pi = parse
+ name ws*
+ { let s = Lexing.lexeme lexbuf in
+ let j = get_name_end s 0 in
+ Pro_name (String.sub s 0 j)
+ }
+ | "=" ws*
+ { Pro_eq }
+ | "'" character_except_apos* "'" ws+
+ { let s = Lexing.lexeme lexbuf in
+ let j = String.index_from s 1 '\'' in
+ Pro_string (String.sub s 1 (j-1))
+ }
+ | "'"
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | '"' character_except_quot* '"' ws+
+ { let s = Lexing.lexeme lexbuf in
+ let j = String.index_from s 1 '"' in
+ Pro_string (String.sub s 1 (j-1))
+ }
+ | '"'
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | eof
+ { Pro_eof }
+ | character
+ { (* prerr_endline (Lexing.lexeme lexbuf); *)
+ raise (WF_error("Illegal token or character"))
+ }
+ | _
+ { raise Netconversion.Malformed_code }
+
+and scan_only_xml_decl = parse
+ "<?xml" ws+ pi_string "?>"
+ { scan_pi (Lexing.lexeme lexbuf) scan_xml_pi }
+ | ""
+ { Eof }
+
+and scan_for_crlf = parse
+ | '\013' '\010'
+ { CharData "\n" }
+ | '\013'
+ { CharData "\n" }
+ | '\010'
+ { CharData "\n" }
+ | [^ '\010' '\013' ]+
+ { CharData (Lexing.lexeme lexbuf) }
+ | eof
+ { Eof }
+
+and scan_content_comment = parse
+ "-->"
+ { Comment_end, Content }
+ | "--"
+ { raise (WF_error "Double hyphens are illegal inside comments") }
+ | "-"
+ { Comment_material "-", Content_comment }
+ | character_except_minus+
+ { Comment_material(Lexing.lexeme lexbuf), Content_comment }
+ | eof
+ { Eof, Content_comment }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* In declarations, comments are always thrown away. *)
+
+and scan_decl_comment = parse
+ "-->"
+ { Comment_end, Declaration }
+ | "--"
+ { raise (WF_error "Double hyphens are illegal inside comments") }
+ | "-"
+ { Comment_material "", Decl_comment }
+ | character_except_minus+
+ { Comment_material "", Decl_comment }
+ | eof
+ { Eof, Decl_comment }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+and scan_document_comment = parse
+ "-->"
+ { Comment_end, Document }
+ | "--"
+ { raise (WF_error "Double hyphens are illegal inside comments") }
+ | "-"
+ { Comment_material "-", Document_comment }
+ | character_except_minus+
+ { Comment_material(Lexing.lexeme lexbuf), Document_comment }
+ | eof
+ { Eof, Document_comment }
+ | _
+ { raise Netconversion.Malformed_code }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/18 20:19:59 gerd
+ * Comments return different comment tokens.
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+rule scan_name_string = parse
+ name
+ { Name (Lexing.lexeme lexbuf) }
+ | ws+
+ { Ignore }
+ | nmtoken
+ { Nametoken (Lexing.lexeme lexbuf) }
+ | eof
+ { Eof }
+ | character
+ { CharData (Lexing.lexeme lexbuf) }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+and scan_ignored_section = parse
+ | "<!["
+ { tok_Conditional_begin__Ignored }
+ | "]]>"
+ { tok_Conditional_end__Ignored }
+ | "<!--" comment_string "-->"
+ { tok_Ignore__Ignored }
+ | '"' character_except_quot* '"'
+ { tok_Ignore__Ignored }
+ | "'" character_except_apos* "'"
+ { tok_Ignore__Ignored }
+ | eof
+ { tok_Eof__Ignored }
+ | character_except_special+
+ { tok_Ignore__Ignored }
+ | "<"
+ { tok_Ignore__Ignored }
+ | "]"
+ { tok_Ignore__Ignored }
+ | "'"
+ { tok_Ignore__Ignored }
+ | "\""
+ { tok_Ignore__Ignored }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+{
+ open Pxp_types
+ open Pxp_lexer_types
+
+#insert open_pxp_lex_aux_*.src
+#insert pxp_lex_aux.src
+
+}
+
+#insert pxp_lex_defs_*.def
+
+
+rule scan_within_tag = parse
+ ws+
+ { tok_Ignore__Within_tag }
+ | name
+ { Name (Lexing.lexeme lexbuf ), Within_tag }
+ | '='
+ { tok_Eq__Within_tag }
+ | '"' character_except_quot* '"'
+ { let s = Lexing.lexeme lexbuf in
+ let v = String.sub s 1 (String.length s - 2) in
+ Attval v, Within_tag }
+ | '"'
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | "'" character_except_apos* "'"
+ { let s = Lexing.lexeme lexbuf in
+ let v = String.sub s 1 (String.length s - 2) in
+ Attval v, Within_tag }
+ | "'"
+ { raise (WF_error ("Cannot find the second quotation mark"))
+ }
+ | '>'
+ { tok_Rangle__Content }
+ | "/>"
+ { tok_Rangle_empty__Content }
+ | eof
+ { tok_Eof__Within_tag }
+ | character
+ { raise (WF_error ("Illegal inside tags")) }
+ | _
+ { raise Netconversion.Malformed_code }
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/08/14 22:18:34 gerd
+ * Bad_character_stream -> Netconversion.Malformed_code
+ *
+ * Revision 1.2 2000/05/29 23:53:12 gerd
+ * Updated because Markup_* modules have been renamed to Pxp_*.
+ *
+ * Revision 1.1 2000/05/20 20:33:25 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+# make all: make bytecode executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+
+#----------------------------------------------------------------------
+
+SRC = ast.ml lexer.ml parser.ml generator.ml
+OBJ = $(SRC:.ml=.cmo)
+
+#----------------------------------------------------------------------
+
+
+.PHONY: all
+all: m2parsergen
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa lexer.ml parser.ml \
+ parser.mli
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~ depend depend.pkg m2parsergen a.out x.ml
+
+#----------------------------------------------------------------------
+# general rules:
+
+OPTIONS =
+OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS)
+OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS)
+OCAMLDEP = ocamldep $(OPTIONS)
+OCAMLFIND = ocamlfind
+
+#----------------------------------------------------------------------
+
+depend: $(SRC)
+ $(OCAMLDEP) $(SRC) >depend
+
+m2parsergen: $(OBJ)
+ $(OCAMLC) -o m2parsergen $(OBJ)
+
+.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly
+
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+.ml.cmo:
+ $(OCAMLC) -c $<
+
+.mli.cmi:
+ $(OCAMLC) -c $<
+
+.mll.ml:
+ ocamllex $<
+
+.mly.ml:
+ ocamlyacc $<
+
+include depend
--- /dev/null
+----------------------------------------------------------------------
+m2parsergen
+----------------------------------------------------------------------
+
+This is a parser generator for top-down (or recursively descending) parsers.
+The input file must be structured as follows:
+
+---------------------------------------- Begin of file
+
+<OCAML TEXT ("preamble")>
+
+%%
+
+<DECLARATIONS>
+
+%%
+
+<RULES>
+
+%%
+
+<OCAML TEXT ("postamble")>
+
+---------------------------------------- End of file
+
+The two-character combination %% separates the various sections. The
+text before the first %% and after the last %% will be copied verbatim
+to the output file.
+
+Within the declarations and rules sections you must use /* ... */ as
+comment braces.
+
+There are two types of declarations:
+
+%token Name
+
+declares that Name is a token without associated value, and
+
+%token <> Name
+
+declares that Name is a token with associated value (i.e. Name x).
+
+In contrast to ocamlyacc, you need not to specify a type. This is a
+fundamental difference, because m2parsergen will not generate a type
+declaration for a "token" type; you must do this yourself.
+
+You need not to declare start symbols; every grammar rule may be used
+as start symbol.
+
+The rules look like:
+
+name_of_rule(arg1, arg2, ...):
+ label1:symbol1 label2:symbol2 ... {{ CODE }}
+| label1:symbol1 label2:symbol2 ... {{ CODE }}
+...
+| label1:symbol1 label2:symbol2 ... {{ CODE }}
+
+The rules may have arguments (note that you must write the
+parantheses, even if the rule does not have arguments). Here, arg1,
+arg2, ... are the formal names of the arguments; you may refer to them
+in OCaml code.
+
+Furthermore, the symbols may have labels (you can leave the labels
+out). You can refer to the value associated with a symbol by its
+label, i.e. there is an OCaml variable with the same name as the label
+prescribes, and this variable contains the value.
+
+The OCaml code must be embraced by {{ and }}, and these separators
+must not occur within the code.
+
+EXAMPLE:
+
+prefix_term():
+ Plus_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
+ {{ v1 + v2 }}
+| Times_symbol Left_paren v1:prefix_term() Comma v2:prefix_term() Right_paren
+ {{ v1 * v2 }}
+| n:Number
+ {{ n }}
+
+As you can see in the example, you must pass values for the arguments
+if you call non-terminal symbols (here, the argument list is empty: ()).
+
+The generated parsers behave as follows:
+
+- A rule is applicable to a token sequence if the first token is
+ matched by the rule.
+
+ In the example: prefix_term is applicable if the first token of a
+ sequence is either Plus_symbol, Times_symbol, or Number.
+
+- One branch of the applicable rule is selected: it is the first
+ branch that matches the first token. THE OTHER TOKENS DO NOT HAVE
+ ANY EFFECT ON BRANCH SELECTION!
+
+ For instance, in the following rule the second branch is never
+ selected, because only the A is used to select the branch:
+
+ a():
+ A B {{ ... }}
+ | A C {{ ... }}
+
+- Once a branch is selected, it is checked whether the branch matches
+ the token sequence. If this check succeeds, the code section of the
+ branch is executed, and the resulting value is returned to the
+ caller.
+ If the check fails, the exception Parsing.Parse_error is raised.
+ Normally, this exception is not caught, and will force the parser
+ to stop.
+
+ The check in detail:
+
+ If the rule demands a terminal, there a must be exactly this
+ terminal at the corresponding location in the token sequence.
+
+ If the rule demands a non-terminal, it is checked whether the rule
+ for to this non-terminal is applicable. If so, the branch
+ is selected, and recursively checked. If the rule is not applicable,
+ the check fails immediately.
+
+- THERE IS NO BACKTRACKING!
+
+ Note that the following works (but the construction is resolved at
+ generation time):
+
+ rule1() =
+ rule2() A B ... {{ ... }}
+
+ rule2() =
+ C {{ ... }}
+ | D {{ ... }}
+
+ In this case, the (only) branch of rule1 is selected if the next
+ token is C or D.
+
+---
+
+
+
+*** Options and repetitions ***
+
+Symbols can be tagged as being optional, or to occur repeatedly:
+
+rule():
+ Name whitespace()* Question_mark?
+
+- "*": The symbol matches zero or more occurrences.
+
+- "?": The symbol matches zero or one occurrence.
+
+This is done as follows:
+
+- terminal*: The maximum number of consecutive tokens <terminal> are
+ matched.
+- non-terminal*: The maximum number of the subsequences matching
+ <non-terminal> are matched. Before another
+ subsequence is matched, it is checked whether the
+ rule for <non-terminal> is applicable. If so, the
+ rule is invoked and must succeed (otherwise Parsing.
+ Parse_error). If not, the loop is exited.
+
+- terminal?: If the next token is <terminal>, it is matched. If not,
+ no token is matched.
+
+- non-terminal?: It is checked whether the rule for <non-terminal>
+ is applicable. If so, the rule is invoked, and
+ matches a sequence of tokens. If not, no token is
+ matched.
+
+You may refer to repeated or optional symbols by labels. In this case,
+the label is associated with lists of values, or optional values,
+respectively:
+
+rule():
+ A lab:other()* lab':unlikely()?
+ {{ let n = List.length lab in ...
+ match lab' with
+ None -> ...
+ | Some v -> ...
+ }}
+
+A different scheme is applied if the symbol is a token without
+associated value (%token Name, and NOT %token <> Name):
+
+rule():
+ A lab:B* lab':C?
+
+Here, "lab" becomes an integer variable counting the number of Bs, and
+"lab'" becomes a boolean variable denoting whether there is a C or not.
+
+
+*** Early let-binding ***
+
+You may put some OCaml code directly after the first symbol of a
+branch:
+
+rule():
+ A $ {{ let-binding }} C D ... {{ ... }}
+
+The code brace {{ let-binding }} must be preceded by a dollar
+sign. You can put "let ... = ... in" statements into this brace:
+
+rule1():
+ n:A $ {{ let twice = 2 * n in }} rule2(twice) {{ ... }}
+
+This code is executed once the branch is selected.
+
+
+*** Very early let-binding ***
+
+This is also possible:
+
+rule():
+ $ {{ CODE }}
+ A
+ ...
+
+The CODE is executed right when the branch is selected, and before any
+other happens. (Only for hacks!)
+
+
+
+*** Computed rules ***
+
+rule():
+ A $ {{ let followup = ... some function ... in }} [ followup ]()
+ {{ ... }}
+
+Between [ and ], you can refer to the O'Caml name of *any* function.
+Here, the function "followup" is bound in the let-binding.
+
+
+*** Error handling ***
+
+If a branch is already selected, but the check fails whether the other
+symbols of the branch match, it is possible to catch the resulting
+exception and to find out at which position the failure has occurred.
+
+rule():
+ x:A y:B z:C {{ ... }} ? {{ ERROR-CODE }}
+
+After a question mark, it is allowed to append another code
+brace. This code is executed if the branch check fails (but not if the
+branch is not selected nor if no branches are selected). The string
+variable !yy_position contains the label of the symbol that caused the
+failure (or it contains the empty string if the symbol does not have a
+label).
+
+Example:
+
+rule():
+ x:A y:B z:C {{ print_endline "SUCCESS" }} ? {{ print_endline !yy_position }}
+
+If the token sequence is A B C, "SUCCESS" will be printed. If the
+sequence is A C, the second symbol fails, and "y" will be printed. If
+the sequence is A B D, the third symbol fails, and "z" will be
+printed. If the sequence is B, the rule will be never selected because
+it is not applicable.
+
+
+
+*** Error recovery ***
+
+You may call the functions yy_current, yy_get_next, or one of the
+parse_* functions in the error brace to recover from the error
+(e.g. to move ahead until a certain token is reached). See below.
+
+
+
+*** How to call the parser ***
+
+The rules are rewritten into a OCaml let-binding:
+
+let rec parse_<rule1> ... = ...
+ and parse_<rule2> ... = ...
+ ...
+ and parse_<ruleN> ... = ...
+in
+
+i.e. there are lots of functions, and the name of the functions are
+"parse_" plus the name of the rules. You can call every function.
+
+The first two arguments of the functions have a special meaning; the
+other arguments are the arguments coming from the rule description:
+
+rule(a,b):
+ ...
+
+===>
+
+let rec parse_rule yy_current yy_get_next a b = ...
+
+The first argument, yy_current, is a function that returns the current
+token. The second arguments, yy_get_next, is a function that switches
+to the next token, and returns it.
+
+If the tokens are stored in a list, this may be a definition:
+
+let input = ref [ Token1; Token2; ... ] in
+let yy_current() = List.hd !input in
+let yy_get_next () =
+ input := List.tl !input;
+ List.hd !input
+
+When you call one of the parser functions, the current token must
+already be loaded, i.e. yy_current returns the first token to match by
+the function.
+
+After the functions has returned, the current token is the token
+following the sequence of tokens that have been matched by the
+function.
+
+The function returns the value computed by the OCaml code brace of the
+rule (or the value of the error brace).
+
+If the rule is not applicable, the exception Not_found is raised.
+
+If the rule is applicable, but it does not match, the exception
+Parsing.Parse_error is raised.
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+type declaration =
+ D_token of string (* D_token name *)
+ | D_typed_token of string (* D_typed_token name *)
+;;
+
+type symbol =
+ U_symbol of (string * string option) (* U_symbol(token, label) *)
+ | L_symbol of (string * string list * string option)
+ (* L_symbol(token, args, label) *)
+ | L_indirect of (string * string list * string option)
+;;
+
+
+type modifier =
+ Exact
+ | Option
+ | Repetition
+;;
+
+
+type pattern =
+ { pat_symbol : symbol;
+ pat_modifier : modifier;
+ }
+
+
+type branch =
+ { branch_selector : symbol;
+ branch_early_code : (string * int * int);
+ branch_binding_code : (string * int * int);
+ branch_pattern : pattern list;
+ branch_result_code : (string * int * int);
+ branch_error_code : (string * int * int) option;
+ }
+;;
+
+type rule =
+ { rule_name : string;
+ rule_arguments : string list; (* List of names *)
+ rule_branches : branch list;
+ }
+;;
+
+type text =
+ { text_decls : declaration list;
+ text_rules : rule list;
+ }
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.2 2000/05/08 22:03:01 gerd
+ * It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Parser
+open Ast
+
+(* Overall scheme:
+ *
+ * The rules are translated to:
+ *
+ * let rec parse_<rule1> ... = ...
+ * and parse_<rule2> ... = ...
+ * and ...
+ * and parse_<ruleN> ... = ...
+ * in
+ *
+ * Every rule has at least two arguments: 'current' and 'get_next'.
+ * 'current()' is the token that should match the first symbol of the
+ * rule. 'get_next()' returns the next token.
+ *
+ * The rules may have further user arguments; these are the next arguments
+ * in turn.
+ *
+ * The rules return the user value. After they have returned to the caller
+ * the current token is the token that follows the sequence of tokens
+ * matching the rule.
+ *
+ * The rules will raise:
+ * - Not_found if the first token does not match
+ * - Parsing.Parse_error if the rest does not match.
+ *
+ * Rule scheme:
+ *
+ * rule(arg1,arg2,...):
+ * (l1:x1)
+ * {{ let-CODE }}
+ * (l2:y2(name1,...)) y3 ...
+ * {{ CODE }}
+ * ? {{ ?-CODE }}
+ * | x2 ...
+ * | ...
+ * | xN
+ *
+ * let parse_<rule> current get_next arg1 arg2 ... =
+ * match current() with
+ * S(x1) -> ...
+ * | S(x2) -> ...
+ * | ...
+ * | S(xN) -> ...
+* | _ -> raise Not_found
+ *
+ * Here, S(xi) denotes the set of tokens matched by xi without all tokens
+ * already matched by x1 to x(i-1). (If S(xi) = empty, a warning is printed,
+ * and this branch of the rule is omitted.)
+ *
+ * S(xi) may be a set because xi may be a reference to another rule. In this
+ * case, S(xi) bases on the set of tokens that match the first symbol of
+ * the other rule. (In general, S(xi) must be computed recursively.)
+ *
+ * If the "?" clause is present, every branch is embraced by the following:
+ *
+ * let position = ref "<Label of x1>" in
+ * ( try ...
+ * with Parsing.Parse_error -> ( <<?-CODE>> )
+ * )
+ *
+ * Next: The "..." is
+ *
+ * OPTIONAL: let <l1> = parse_<rule(x1)> in
+ * <<let-CODE>>
+ * M(y1)
+ * M(y2)
+ * ...
+ * M(yN)
+ * <<CODE>>
+ *
+ * If x1 is a rule invocation, it is now parsed, and the result is bound
+ * to a variable.
+ *
+ * Note: After x1 has matched, the Caml variable <l1> must be either
+ * bound to the result of the sub parsing, or to the value associated
+ * with the token (if any). The latter is already done in the main
+ * "match" statement, i.e. "match ... with S(x1) -> ..." is actually
+ * "match ... with Token1 <l1> -> ...".
+ *
+ * Note: After calling parse_<rule(x1)> the exception Not_found is NEVER
+ * converted to Parsing.Parse_error. It is simply not possible that this
+ * happens.
+
+ * For every remaining symbol yi of the rule, a matching statement M(yi)
+ * is produced. These statements have the form:
+ *
+ * OPTIONAL: position := "<Label of yi>";
+ * CASE: yi is a token without associated value
+ * let yy_i = get_next() OR current() in
+ * if yy_i <> Token(yi) then raise Parsing.Parse_error;
+ * CASE: yi is a token with value
+ * let yy_i = get_next() OR current() in
+ * let <li> = match yy_i with Token x -> x | _ -> raise Parsing.Parse_error
+ * in
+ * CASE: yi is a rule invocation
+ * OPTIONAL: let _ = get_next() in
+ * let <li> = try parse_<rule(yi)>
+ * with Not_found -> raise Parsing.Parse_error in
+ *
+ * yy_i is get_next() if y(i-1) was a token, and yy_i is current() if
+ * y(i-1) was a rule invocation.
+ *
+ * Repetitions:
+ *
+ * If yi = (yi')*:
+ *
+ * CASE no label given:
+ *
+ * ( try
+ * while true do
+ * M(yi') with the modification that top-level mismatches raise
+ * Not_found instead of Parsing.Parse_error
+ * done
+ * with Not_found -> ()
+ * )
+ *
+ * CASE a label <li> is given: The list of results must be bound to <li>!
+ *
+ * let yy_list = ref [] in
+ * ( try
+ * while true do
+ * let yy_first = M(yi') (with some modifications) in
+ * yy_list := yy_first :: !yy_list;
+ * done
+ * with Not_found -> ()
+ * );
+ * let <li> = List.rev !yy_list in
+ *
+ * Note that this scheme minimizes stack and heap allocations.
+ *
+ * Options:
+ *
+ * If yi = (yi')?:
+ *
+ * CASE no label given:
+ *
+ * ( try
+ * M(yi') with the modification that top-level mismatches raise
+ * Not_found instead of Parsing.Parse_error
+ * with Not_found -> ()
+ * )
+ *
+ * CASE a label <li> is given: The optional result must be bound to <li>!
+ *
+ * let <li> =
+ * try
+ * Some( M(yi') (with some modifications) )
+ * with Not_found -> None
+ * );
+ *)
+
+
+let lookup_rule tree name =
+ try
+ List.find (fun r -> r.rule_name = name) tree.text_rules
+ with
+ Not_found ->
+ failwith ("Rule `" ^ name ^ "' not found")
+;;
+
+
+let is_typed tree name =
+ (* Find out whether the token 'name' is typed or not *)
+ let decl =
+ try
+ List.find (fun d -> match d with
+ D_token n -> n = name
+ | D_typed_token n -> n = name
+ )
+ tree.text_decls
+ with
+ Not_found ->
+ failwith ("Token `" ^ name ^ "' not found")
+ in
+ match decl with
+ D_token _ -> false
+ | D_typed_token _ -> true
+;;
+
+
+let label_of_symbol tree sym =
+ match sym with
+ U_symbol (tok, lab) ->
+ (* if is_typed tree tok then lab else None *)
+ lab
+ | L_symbol (_, _, lab) -> lab
+ | L_indirect (_, _, lab) -> lab
+;;
+
+
+let is_untyped_U_symbol tree sym =
+ match sym with
+ U_symbol (tok, _) ->
+ not(is_typed tree tok)
+ | L_symbol (_, _, _) -> false
+ | L_indirect (_, _, _) -> false
+;;
+
+
+
+let rec set_of_list l =
+ (* Removes duplicate members of l *)
+ match l with
+ [] -> []
+ | x :: l' -> if List.mem x l' then set_of_list l' else x :: (set_of_list l')
+;;
+
+
+let selector_set_of_rule tree name =
+ (* Determines the set of tokens that match the first symbol of a rule *)
+
+ let rec collect visited_rules name =
+ if List.mem name visited_rules then
+ []
+ else
+ let r = lookup_rule tree name in
+ List.flatten
+ (List.map
+ (fun branch ->
+ match branch.branch_selector with
+ U_symbol (tok_name,_) ->
+ [ tok_name ]
+ | L_symbol (rule_name, _, _) ->
+ collect (name :: visited_rules) rule_name
+ | L_indirect (_, _, _) ->
+ failwith("The first symbol in rule `" ^ name ^
+ "' is an indirect call; this is not allowed")
+ )
+ r.rule_branches
+ )
+ in
+ set_of_list (collect [] name)
+;;
+
+
+let output_code_location b file_name (_, line, column) =
+ Buffer.add_string b "\n";
+ Buffer.add_string b ("# " ^ string_of_int line ^ " \"" ^
+ file_name ^ "\"\n");
+ Buffer.add_string b (String.make column ' ')
+;;
+
+
+let phantasy_line = ref 100000;;
+
+let output_code b file_name ((code, line, column) as triple) =
+ if code <> "" then begin
+ output_code_location b file_name triple;
+ Buffer.add_string b code;
+ Buffer.add_string b ("\n# " ^ string_of_int !phantasy_line ^ " \"<Generated Code>\"\n");
+ phantasy_line := !phantasy_line + 10000;
+ end
+;;
+
+
+let process_branch b file_name tree branch =
+
+ let make_rule_invocation called_rule args lab allow_not_found =
+ (* Produces: let <label> = parse_<called_rule> ... args in
+ * If not allow_not_found, the exception Not_found is caught and
+ * changed into Parsing.Parse_error.
+ *)
+ let r = lookup_rule tree called_rule in
+ if List.length r.rule_arguments <> List.length args then
+ failwith("Calling rule `" ^ called_rule ^ "' with the wrong number of arguments!");
+
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = ";
+ if not allow_not_found then
+ Buffer.add_string b "try ";
+ Buffer.add_string b "parse_";
+ Buffer.add_string b called_rule;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun a -> Buffer.add_string b " ";
+ Buffer.add_string b a;
+ )
+ args;
+ if not allow_not_found then
+ Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+ Buffer.add_string b " in\n"
+ in
+
+ let make_indirect_rule_invocation ml_name args lab allow_not_found =
+ (* Produces: let <label> = ml_name ... args in
+ * If not allow_not_found, the exception Not_found is caught and
+ * changed into Parsing.Parse_error.
+ *)
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = ";
+ if not allow_not_found then
+ Buffer.add_string b "try ";
+ Buffer.add_string b ml_name;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun a -> Buffer.add_string b " ";
+ Buffer.add_string b a;
+ )
+ args;
+ if not allow_not_found then
+ Buffer.add_string b " with Not_found -> raise Parsing.Parse_error";
+ Buffer.add_string b " in\n"
+ in
+
+ let process_symbol sym previous_was_token allow_not_found =
+ match sym with
+ U_symbol(tok, lab) ->
+ (* Distinguish between simple tokens and typed tokens *)
+ if is_typed tree tok then begin
+ (* Typed token *)
+ Buffer.add_string b "let ";
+ begin match lab with
+ None -> Buffer.add_string b "_"
+ | Some l -> Buffer.add_string b l
+ end;
+ Buffer.add_string b " = match ";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b " with ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " x -> x | _ -> raise ";
+ if allow_not_found then
+ Buffer.add_string b "Not_found"
+ else
+ Buffer.add_string b "Parsing.Parse_error";
+ Buffer.add_string b " in\n";
+ end
+ else begin
+ (* Simple token *)
+ Buffer.add_string b "if (";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b ") <> ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then raise ";
+ if allow_not_found then
+ Buffer.add_string b "Not_found;\n"
+ else
+ Buffer.add_string b "Parsing.Parse_error;\n"
+ end
+ | L_symbol(called_rule, args, lab) ->
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ make_rule_invocation called_rule args lab allow_not_found
+ | L_indirect(ml_name, args, lab) ->
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ make_indirect_rule_invocation ml_name args lab allow_not_found
+ in
+
+ let process_pattern (current_position, previous_was_token) pat =
+ (* Assign "position" if necessary. *)
+ let new_position =
+ if branch.branch_error_code <> None then begin
+ match pat.pat_symbol with
+ U_symbol(_,Some l) -> l
+ | L_symbol(_,_,Some l) -> l
+ | L_indirect(_,_,Some l) -> l
+ | _ -> ""
+ end
+ else ""
+ in
+ if new_position <> current_position then begin
+ Buffer.add_string b "yy_position := \"";
+ Buffer.add_string b new_position;
+ Buffer.add_string b "\";\n";
+ end;
+
+ let this_is_token =
+ match pat.pat_symbol with
+ U_symbol(_,_) -> pat.pat_modifier = Exact
+ | L_symbol(_,_,_) -> false
+ | L_indirect(_,_,_) -> false
+ in
+
+ (* First distinguish between Exact, Option, and Repetition: *)
+ begin match pat.pat_modifier with
+ Exact ->
+ process_symbol pat.pat_symbol previous_was_token false
+ | Option ->
+ begin match label_of_symbol tree pat.pat_symbol with
+ None ->
+ (* CASE: optional symbol without label *)
+ (* OPTIMIZATION: If the symbol is
+ * a token, the loop becomes very simple.
+ *)
+ if (match pat.pat_symbol with
+ U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
+ then begin
+ let tok = match pat.pat_symbol with
+ U_symbol(t,_) -> t | _ -> assert false in
+ (* Optimized case *)
+ Buffer.add_string b "if ";
+ if previous_was_token then
+ Buffer.add_string b "yy_get_next()"
+ else
+ Buffer.add_string b "yy_current()";
+ Buffer.add_string b " = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then ignore(yy_get_next());\n";
+ end
+ else begin
+ (* General, non-optimized case: *)
+ Buffer.add_string b "( try (";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b ") with Not_found -> ());\n";
+ end
+ | Some l ->
+ (* CASE: optional symbol with label *)
+ if is_untyped_U_symbol tree pat.pat_symbol then begin
+ (* SUBCASE: The label becomes a boolean variable *)
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = try (";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b ");\n";
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "true with Not_found -> false in\n";
+ end
+ else begin
+ (* SUBCASE: the symbol has a value *)
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = try let yy_tok = Some(";
+ process_symbol pat.pat_symbol previous_was_token true;
+ Buffer.add_string b l;
+ Buffer.add_string b ") in\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "yy_tok with Not_found -> None in\n";
+ end
+ end
+ | Repetition ->
+ begin match label_of_symbol tree pat.pat_symbol with
+ None ->
+ (* CASE: repeated symbol without label *)
+ (* OPTIMIZATION: If the symbol is
+ * a token, the loop becomes very simple.
+ *)
+ if (match pat.pat_symbol with
+ U_symbol(t,_) -> not (is_typed tree t) | _ -> false)
+ then begin
+ let tok = match pat.pat_symbol with
+ U_symbol(t,_) -> t | _ -> assert false in
+ if previous_was_token then begin
+ (* Optimized case I *)
+ Buffer.add_string b "while yy_get_next() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " do () done;\n";
+ end
+ else begin
+ (* Optimized case II *)
+ Buffer.add_string b "if yy_current() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " then (";
+ Buffer.add_string b "while yy_get_next() = ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " do () done);\n";
+ end
+ end
+ else begin
+ (* General, non-optimized case: *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "( try while true do (";
+ process_symbol pat.pat_symbol false true;
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n"
+ else
+ Buffer.add_string b "();\n";
+
+ Buffer.add_string b ") done with Not_found -> ());\n";
+ end
+ | Some l ->
+ (* CASE: repeated symbol with label *)
+ if is_untyped_U_symbol tree pat.pat_symbol then begin
+ (* SUBCASE: The label becomes an integer variable *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "let yy_counter = ref 0 in\n";
+ Buffer.add_string b "( try while true do \n";
+ process_symbol pat.pat_symbol false true;
+ Buffer.add_string b "incr yy_counter;\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "done with Not_found -> ());\n";
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = !yy_counter in\n";
+ end
+ else begin
+ (* SUBCASE: the symbol has a value *)
+ if previous_was_token then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+ Buffer.add_string b "let yy_list = ref [] in\n";
+ Buffer.add_string b "( try while true do \n";
+ process_symbol pat.pat_symbol false true;
+ Buffer.add_string b "yy_list := ";
+ Buffer.add_string b l;
+ Buffer.add_string b " :: !yy_list;\n";
+
+ if (match pat.pat_symbol with
+ U_symbol(_,_) -> true | _ -> false) then
+ Buffer.add_string b "ignore(yy_get_next());\n";
+
+ Buffer.add_string b "done with Not_found -> ());\n";
+ Buffer.add_string b "let ";
+ Buffer.add_string b l;
+ Buffer.add_string b " = List.rev !yy_list in\n";
+ end
+ end
+ end;
+
+ (* Continue: *)
+ (new_position, this_is_token)
+ in
+
+
+ let process_inner_branch current_position =
+ (* If there is "early code", run this now: *)
+ output_code b file_name branch.branch_early_code;
+ Buffer.add_string b "\n";
+
+ (* If the first symbol is a rule invocation, call the corresponding
+ * parser function now.
+ *)
+ let previous_was_token =
+ begin match branch.branch_selector with
+ U_symbol(_,_) ->
+ true
+ | L_symbol(called_rule, args, lab) ->
+ make_rule_invocation called_rule args lab true;
+ false
+ | L_indirect(_,_,_) ->
+ failwith("The first symbol in some rule is an indirect call; this is not allowed")
+ end
+ in
+
+ (* Now output the "let-CODE". *)
+ output_code b file_name branch.branch_binding_code;
+ Buffer.add_string b "\n";
+
+ (* Process the other symbols in turn: *)
+ let (_, previous_was_token') =
+ (List.fold_left
+ process_pattern
+ (current_position, previous_was_token)
+ branch.branch_pattern
+ )
+ in
+
+ (* Special case:
+ *
+ * If previous_was_token', we must invoke yy_get_next one more time.
+ * This is deferred until "CODE" is executed to give this code
+ * the chance to make the next token available (in XML, the next token
+ * might come from a different entity, and "CODE" must switch to this
+ * entity).
+ *)
+
+ (* Now output "CODE": *)
+ Buffer.add_string b "let result = \n";
+ output_code b file_name branch.branch_result_code;
+ Buffer.add_string b "\nin\n";
+
+ if previous_was_token' then
+ Buffer.add_string b "ignore(yy_get_next());\nresult\n"
+ else
+ Buffer.add_string b "result\n"
+ in
+
+ (* If we have a ? clause, generate now the "try" statement *)
+ match branch.branch_error_code with
+ None ->
+ Buffer.add_string b "( ";
+ process_inner_branch "";
+ Buffer.add_string b " )";
+ | Some code ->
+
+ (* let position = ref "<label>" in *)
+
+ Buffer.add_string b "let yy_position = ref \"";
+ let current_position =
+ match branch.branch_selector with
+ U_symbol(_,_) -> ""
+ | L_symbol(_,_,None) -> ""
+ | L_symbol(_,_,Some l) -> l
+ | L_indirect(_,_,None) -> ""
+ | L_indirect(_,_,Some l) -> l
+ in
+ Buffer.add_string b current_position;
+ Buffer.add_string b "\" in\n";
+
+ (* The "try" statement: *)
+
+ Buffer.add_string b "( try (\n";
+
+ process_inner_branch current_position;
+
+ Buffer.add_string b "\n) with Parsing.Parse_error -> (\n";
+ output_code b file_name code;
+ Buffer.add_string b "\n))\n"
+;;
+
+
+let process b file_name tree =
+ (* Iterate over the rules and output the parser functions: *)
+ let is_first = ref true in
+ List.iter
+ (fun r ->
+
+ (* Generate the function header: *)
+
+ if !is_first then
+ Buffer.add_string b "let rec "
+ else
+ Buffer.add_string b "and ";
+ is_first := false;
+ Buffer.add_string b "parse_";
+ Buffer.add_string b r.rule_name;
+ Buffer.add_string b " yy_current yy_get_next";
+ List.iter
+ (fun arg -> Buffer.add_string b " ";
+ Buffer.add_string b arg)
+ r.rule_arguments;
+ Buffer.add_string b " =\n";
+
+ (* Generate the "match" statement: *)
+
+ Buffer.add_string b "match yy_current() with\n";
+ let s_done = ref [] in
+ (* s_done: The set of already matched tokens *)
+
+ List.iter
+ (fun branch ->
+ match branch.branch_selector with
+ U_symbol(tok, lab) ->
+ (* A simple token *)
+ if List.mem tok !s_done then begin
+ prerr_endline("WARNING: In rule `" ^ r.rule_name ^
+ "': Match for token `" ^
+ tok ^ "' hidden by previous match");
+ end
+ else
+ if is_typed tree tok then begin
+ match lab with
+ None ->
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " _ -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ | Some l ->
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " ";
+ Buffer.add_string b l;
+ Buffer.add_string b " -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ end
+ else begin
+ Buffer.add_string b "| ";
+ Buffer.add_string b tok;
+ Buffer.add_string b " -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := tok :: !s_done;
+ end
+ | L_symbol(called_rule, args, lab) ->
+ (* An invocation of a rule *)
+ let s_rule = selector_set_of_rule tree called_rule in
+ let s_rule' =
+ List.filter
+ (fun tok ->
+ if List.mem tok !s_done then begin
+ prerr_endline("WARNING: In rule `" ^ r.rule_name ^
+ "': Match for token `" ^
+ tok ^ "' hidden by previous match");
+ false
+ end
+ else true)
+ s_rule in
+ if s_rule' <> [] then begin
+ Buffer.add_string b "| ( ";
+ let is_first = ref true in
+ List.iter
+ (fun tok ->
+ if not !is_first then
+ Buffer.add_string b " | ";
+ is_first := false;
+ Buffer.add_string b tok;
+ if is_typed tree tok then
+ Buffer.add_string b " _";
+ )
+ s_rule';
+ Buffer.add_string b ") -> ";
+ process_branch b file_name tree branch;
+ Buffer.add_string b "\n";
+ s_done := s_rule' @ !s_done;
+ end
+ | L_indirect(ml_name, args, lab) ->
+ (* An invocation of an indirect rule *)
+ failwith("The first symbol in rule `" ^ r.rule_name ^
+ "' is an indirect call; this is not allowed")
+ )
+ r.rule_branches;
+
+ Buffer.add_string b "\n| _ -> raise Not_found\n";
+ )
+ tree.text_rules;
+
+ Buffer.add_string b " in\n"
+;;
+
+
+let count_lines s =
+ (* returns number of lines in s, number of columns of the last line *)
+ let l = String.length s in
+
+ let rec count n k no_cr no_lf =
+ let next_cr =
+ if no_cr then
+ (-1)
+ else
+ try String.index_from s k '\013' with Not_found -> (-1) in
+ let next_lf =
+ if no_lf then
+ (-1)
+ else
+ try String.index_from s k '\010' with Not_found -> (-1) in
+ if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
+ if next_cr+1 < l & s.[next_cr+1] = '\010' then
+ count (n+1) (next_cr+2) false (next_lf < 0)
+ else
+ count (n+1) (next_cr+1) false (next_lf < 0)
+ end
+ else if next_lf >= 0 then begin
+ count (n+1) (next_lf+1) (next_cr < 0) false
+ end
+ else
+ n, (l - k)
+
+ in
+ count 0 0 false false
+;;
+
+
+type scan_context =
+ { mutable old_line : int;
+ mutable old_column : int;
+ mutable line : int;
+ mutable column : int;
+ }
+;;
+
+
+let rec next_token context lexbuf =
+ let t = Lexer.scan_file lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Space -> next_token context lexbuf
+ | Code(s,_,_) -> Code(s,line,column + 2)
+ | Eof -> failwith "Unexpected end of file"
+ | _ -> t
+;;
+
+
+let parse_and_generate ch =
+ let b = Buffer.create 20000 in
+
+ let rec find_sep context lexbuf =
+ let t = Lexer.scan_header lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Code(s,_,_) ->
+ Buffer.add_string b s;
+ find_sep context lexbuf
+ | Eof -> failwith "Unexpected end of file"
+ | Separator -> ()
+ | _ -> assert false
+ in
+
+ let rec find_rest context lexbuf =
+ let t = Lexer.scan_header lexbuf in
+ let line = context.line in
+ let column = context.column in
+ context.old_line <- line;
+ context.old_column <- column;
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ if n_lines > 0 then begin
+ context.line <- line + n_lines;
+ context.column <- n_columns;
+ end
+ else
+ context.column <- column + n_columns;
+ match t with
+ Code(s,_,_) ->
+ Buffer.add_string b s;
+ find_rest context lexbuf
+ | Eof -> ()
+ | _ -> assert false
+ in
+
+ (* First read until '%%' *)
+ let lexbuf = Lexing.from_channel ch in
+ let context = { old_line = 0; old_column = 0; line = 1; column = 0 } in
+ let file_name = "stdin" in
+ try
+ output_code_location b file_name ("", 1, 0);
+ find_sep context lexbuf;
+ (* Parse the following text *)
+ let text = (Parser.text (next_token context) lexbuf : Ast.text) in
+ (* Process it: *)
+ process b file_name text;
+ (* Read rest *)
+ output_code_location b file_name ("", context.line, context.column);
+ find_rest context lexbuf;
+ (* Output everything: *)
+ print_string (Buffer.contents b)
+ with
+ any ->
+ Printf.eprintf
+ "Error at line %d column %d: %s\n"
+ context.old_line
+ context.old_column
+ (Printexc.to_string any);
+ exit 1
+;;
+
+
+parse_and_generate stdin;;
+exit 0;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/17 00:33:02 gerd
+ * Bugfix: tok* and tok? work now if tok is an untyped token
+ * without label.
+ *
+ * Revision 1.6 2000/05/14 20:59:24 gerd
+ * Added "phantasy line numbers" to help finding errorneous locations.
+ *
+ * Revision 1.5 2000/05/14 20:41:58 gerd
+ * x: Token? means: if Token is detected x=true else x=false.
+ * x: Token* means: x becomes the number of ocurrences of Token.
+ *
+ * Revision 1.4 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.3 2000/05/08 22:03:01 gerd
+ * It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.2 2000/05/06 21:51:08 gerd
+ * Numerous bugfixes.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+{
+ open Parser
+}
+
+rule scan_file = parse
+ "/*" [^ '*']* ('*'+ [^ '/' '*'] [^ '*']* )* '*'* "*/"
+ { Space }
+ | "%token"
+ { Token }
+ | "<" [' ' '\t' '\r' '\n']* ">"
+ { Type
+ }
+ | [ 'a'-'z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
+ { let s = Lexing.lexeme lexbuf in
+ Lname s
+ }
+ | [ 'A'-'Z' ] [ 'a'-'z' 'A'-'Z' '0'-'9' '_' ]*
+ { let s = Lexing.lexeme lexbuf in
+ Uname s
+ }
+ | "%%"
+ { Separator }
+ | "("
+ { Lparen }
+ | ","
+ { Comma }
+ | ")"
+ { Rparen }
+ | "["
+ { Lbracket }
+ | "]"
+ { Rbracket }
+ | ":"
+ { Colon }
+ | "{{" [^ '}']* ( '}' [^ '}']+ )* "}}"
+ { let s = Lexing.lexeme lexbuf in
+ Code (String.sub s 2 (String.length s - 4), 0, 0)
+ }
+ | "?"
+ { Error }
+ | "|"
+ { Alt }
+ | "+"
+ { Loop_plus }
+ | "*"
+ { Loop_star }
+ | [' ' '\t' '\r' '\n']+
+ { Space }
+ | "$"
+ { Dollar }
+ | eof
+ { Eof }
+
+and scan_header = parse
+ "%%"
+ { Separator }
+ | "%"
+ { Code("%", 0, 0) }
+ | [^ '%']*
+ { Code(Lexing.lexeme lexbuf, 0, 0) }
+ | eof
+ { Eof }
+
+and scan_rest = parse
+ _*
+ { Code(Lexing.lexeme lexbuf, 0, 0) }
+ | eof
+ { Eof }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.2 2000/05/06 21:51:24 gerd
+ * New symbol Dollar.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+/* $Id$
+ * ----------------------------------------------------------------------
+ *
+ */
+
+%{
+ open Ast
+
+%}
+
+%token Space
+%token Token
+%token Type
+%token <string> Lname
+%token <string> Uname
+%token Separator
+%token Lparen
+%token Rparen
+%token Comma
+%token Colon
+%token <string * int * int> Code
+%token Error
+%token Alt
+%token Loop_plus
+%token Loop_star
+%token Dollar
+%token Lbracket
+%token Rbracket%token Eof
+
+%start text
+%type <Ast.text> text
+
+%%
+
+text:
+ declarations rules
+ { { text_decls = $1; text_rules = $2; } }
+
+declarations:
+ declaration declarations
+ { $1 :: $2 }
+| Separator
+ { [] }
+
+declaration:
+ Token Uname
+ { D_token $2 }
+| Token Type Uname
+ { D_typed_token $3 }
+
+rules:
+ rule rules
+ { $1 :: $2 }
+| Separator
+ { [] }
+
+rule:
+ Lname Lparen formal_arguments Colon branches
+ { { rule_name = $1;
+ rule_arguments = $3;
+ rule_branches = $5;
+ }
+ }
+
+formal_arguments:
+ Rparen
+ { [] }
+| Lname comma_formal_arguments
+ { $1 :: $2 }
+
+comma_formal_arguments:
+ Comma Lname comma_formal_arguments
+ { $2 :: $3 }
+| Rparen
+ { [] }
+
+branches:
+ branch alt_branches
+ { $1 :: $2 }
+
+alt_branches:
+ Alt branch alt_branches
+ { $2 :: $3 }
+|
+ { [] }
+
+branch:
+ simple_branch
+ { $1 }
+| Dollar Code simple_branch
+ { { $3 with branch_early_code = $2 } }
+
+simple_branch:
+ symbol Dollar Code patterns Code opt_error_handler
+ { { branch_selector = $1;
+ branch_early_code = ("",0,0);
+ branch_binding_code = $3;
+ branch_pattern = $4;
+ branch_result_code = $5;
+ branch_error_code = $6;
+ }
+ }
+| symbol patterns Code opt_error_handler
+ { { branch_selector = $1;
+ branch_early_code = ("",0,0);
+ branch_binding_code = ("", 0, 0);
+ branch_pattern = $2;
+ branch_result_code = $3;
+ branch_error_code = $4;
+ }
+ }
+
+patterns:
+ pattern patterns
+ { $1 :: $2 }
+|
+ { [] }
+
+pattern:
+ symbol Loop_star
+ { { pat_symbol = $1;
+ pat_modifier = Repetition;
+ }
+ }
+| symbol Error
+ { { pat_symbol = $1;
+ pat_modifier = Option;
+ }
+ }
+| symbol
+ { { pat_symbol = $1;
+ pat_modifier = Exact;
+ }
+ }
+
+symbol:
+ Lname Colon Uname
+ { U_symbol($3, Some $1) }
+| Lname Colon Lname Lparen actual_arguments
+ { L_symbol($3, $5, Some $1) }
+| Lname Colon Lbracket Lname Rbracket Lparen actual_arguments
+ { L_indirect($4, $7, Some $1) }
+| Uname
+ { U_symbol($1, None) }
+| Lname Lparen actual_arguments
+ { L_symbol($1, $3, None) }
+| Lbracket Lname Rbracket Lparen actual_arguments
+ { L_indirect($2, $5, None) }
+
+
+actual_arguments:
+ Rparen
+ { [] }
+| Lname comma_actual_arguments
+ { $1 :: $2 }
+
+comma_actual_arguments:
+ Rparen
+ { [] }
+| Comma Lname comma_actual_arguments
+ { $2 :: $3 }
+
+opt_error_handler:
+ Error Code
+ { Some $2 }
+|
+ { None }
+
+%%
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/05/09 00:03:22 gerd
+ * Added [ ml_name ] symbols, where ml_name is an arbitrary
+ * OCaml identifier.
+ *
+ * Revision 1.3 2000/05/08 22:03:01 gerd
+ * It is now possible to have a $ {{ }} sequence right BEFORE
+ * the first token. This code is executed just after the first token
+ * has been recognized.
+ *
+ * Revision 1.2 2000/05/06 21:51:46 gerd
+ * New Dollar tag.
+ *
+ * Revision 1.1 2000/05/06 17:36:17 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+
+type token =
+ A | B | C of int | EOF
+;;
+
+%%
+
+%token A
+%token B
+%token <> C
+%token EOF
+
+%%
+
+r():
+ one:s()
+ {{ }}
+ b:B
+ two:B?
+ three:s()
+ {{ prerr_endline ("Result: " ^ string_of_int three) }}
+? {{ prerr_endline ("ERROR: " ^ !yy_position) }}
+
+s():
+ A
+ {{ }}
+ {{ prerr_endline "A"; 0 }}
+| B
+ {{ }}
+ {{ prerr_endline "B"; 0 }}
+| n:C
+ {{ }}
+ {{ prerr_endline ("C: " ^ string_of_int n); n }}
+%%
+
+let input = ref [ A; B; B; B; C 5; EOF ] in
+let current() = List.hd !input in
+let next_token () =
+ prerr_endline "get_next";
+ input := List.tl !input;
+ List.hd !input
+in
+parse_r current next_token
+;;
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ * Some auxiliary functions
+ *)
+
+(**********************************************************************)
+(* Lexing *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_lexers
+open Netconversion
+
+let character enc warner k =
+ assert (k>=0);
+ if (k >= 0xd800 & k < 0xe000) or (k >= 0xfffe & k <= 0xffff) or k > 0x10ffff
+ or (k < 8) or (k = 11) or (k = 12) or (k >= 14 & k <= 31)
+ then
+ raise (WF_error("Code point " ^ string_of_int k ^
+ " outside the accepted range of code points"));
+
+ try
+ makechar (enc : rep_encoding :> encoding) k
+ with
+ Not_found ->
+ warner # warn ("Code point cannot be represented in internal encoding: "
+ ^ string_of_int k);
+ ""
+;;
+
+
+let check_name warner name =
+ (* produces a warning for names beginning with "xml". *)
+ if String.length name >= 3 then begin
+ match String.sub name 0 3 with
+ ("xml" | "xmL" | "xMl" | "xML" | "Xml" | "XmL" | "XMl" | "XML") ->
+ warner # warn ("Name is reserved for future extensions: " ^ name)
+ | _ ->
+ ()
+ end
+;;
+
+
+let tokens_of_content_string lexerset s =
+ (* tokenizes general entities and character entities *)
+ let lexbuf = Lexing.from_string s in
+ let rec next_token () =
+ match lexerset.scan_content_string lexbuf with
+ Eof -> []
+ | tok -> tok :: next_token()
+ in
+ next_token()
+;;
+
+
+let rec expand_attvalue_with_rec_check lexerset dtd s warner entities norm_crlf =
+ (* recursively expands general entities and character entities;
+ * checks "standalone" document declaration;
+ * normalizes whitespace
+ *)
+ let toklist = tokens_of_content_string lexerset s in
+ let rec expand tl =
+ match tl with
+ [] -> ""
+ | ERef n :: tl' ->
+ if List.mem n entities then
+ raise(WF_error("Recursive reference to general entity `" ^ n ^ "'"));
+ let en, extdecl = dtd # gen_entity n in
+ if dtd # standalone_declaration && extdecl then
+ raise(Validation_error("Reference to entity `" ^ n ^
+ "' violates standalone declaration"));
+ let rtext, rtext_contains_ext_refs = en # replacement_text in
+ if rtext_contains_ext_refs then
+ raise(Validation_error("Found reference to external entity in attribute value"));
+ expand_attvalue_with_rec_check
+ lexerset dtd rtext warner (n :: entities) false ^ expand tl'
+ | CRef(-1) :: tl' ->
+ if norm_crlf then
+ " " ^ expand tl'
+ else
+ " " ^ expand tl'
+ | CRef n :: tl' ->
+ character lexerset.lex_encoding warner n ^ expand tl'
+ | CharData "<" :: tl' ->
+ raise
+ (WF_error
+ ("Attribute value contains character '<' literally"))
+ | CharData x :: tl' ->
+ x ^ expand tl'
+ | _ -> assert false
+ in
+ expand toklist
+;;
+
+
+let expand_attvalue lexerset dtd s warner norm_crlf =
+ (* norm_crlf: whether the sequence CRLF is recognized as one character or
+ * not (i.e. two characters)
+ *)
+ expand_attvalue_with_rec_check lexerset dtd s warner [] norm_crlf
+;;
+
+
+let count_lines s =
+ (* returns number of lines in s, number of columns of the last line *)
+ let l = String.length s in
+
+ let rec count n k no_cr no_lf =
+ let next_cr =
+ if no_cr then
+ (-1)
+ else
+ try String.index_from s k '\013' with Not_found -> (-1) in
+ let next_lf =
+ if no_lf then
+ (-1)
+ else
+ try String.index_from s k '\010' with Not_found -> (-1) in
+ if next_cr >= 0 & (next_lf < 0 or next_cr < next_lf) then begin
+ if next_cr+1 < l & s.[next_cr+1] = '\010' then
+ count (n+1) (next_cr+2) false (next_lf < 0)
+ else
+ count (n+1) (next_cr+1) false (next_lf < 0)
+ end
+ else if next_lf >= 0 then begin
+ count (n+1) (next_lf+1) (next_cr < 0) false
+ end
+ else
+ n, (l - k)
+
+ in
+ count 0 0 false false
+;;
+
+
+let tokens_of_xml_pi lexers s =
+ let lexbuf = Lexing.from_string (s ^ " ") in
+ let rec collect () =
+ let t = lexers.scan_xml_pi lexbuf in
+ match t with
+ Pro_eof -> []
+ | _ -> t :: collect()
+ in
+ collect()
+;;
+
+
+let decode_xml_pi pl =
+ (* 'pl' must consist of name="value" or name='value' pairs which are returned
+ * as list of pairs.
+ * The "value" is returned as it is; no substitution of &entities; happens.
+ *)
+ let rec decode pl =
+ match pl with
+ Pro_name name :: Pro_eq :: Pro_string value :: pl' ->
+ (name, value) :: decode pl'
+ | [] ->
+ []
+ | _ ->
+ raise (WF_error("Bad XML processing instruction"))
+ in
+ decode pl
+;;
+
+
+let decode_doc_xml_pi pl =
+ match pl with
+ [ "version", v ] -> (v, None, None)
+ | [ "version", v; "encoding", e ] -> (v, Some e, None)
+ | [ "version", v; "standalone", s ] -> (v, None, Some s)
+ | [ "version", v; "encoding", e; "standalone", s ] -> (v, Some e, Some s)
+ | _ ->
+ raise(WF_error("Bad XML declaration"))
+;;
+
+
+let check_text_xml_pi pl =
+ match pl with
+ | [ "version", v; "encoding", e ] -> ()
+ | [ "encoding", e ] -> ()
+ | _ ->
+ raise(WF_error("Bad XML declaration"))
+;;
+
+
+let check_version_num s =
+ let l = String.length s in
+ for i = 0 to l - 1 do
+ match s.[i] with
+ ('a'..'z'|'A'..'Z'|'0'..'9'|
+ '-'|'_'|'.'|':') -> ()
+ | _ ->
+ raise(WF_error("Bad XML version string"))
+ done
+;;
+
+
+let check_public_id s =
+ let l = String.length s in
+ for i = 0 to l - 1 do
+ match s.[i] with
+ (' '|'\013'|'\010'|'a'..'z'|'A'..'Z'|'0'..'9'|
+ '-'|'\''|'('|')'|'+'|','|'.'|'/'|':'|'='|'?'|
+ ';'|'!'|'*'|'#'|'@'|'$'|'_'|'%') -> ()
+ | _ ->
+ raise(WF_error("Illegal character in PUBLIC identifier"))
+ done
+;;
+
+
+(**********************************************************************)
+(* list functions *)
+
+
+let rec check_dups l =
+ match l with
+ [] -> false
+ | c :: l' ->
+ if List.mem c l' then true else check_dups l'
+;;
+
+
+let rec count pred l =
+ match l with
+ [] -> 0
+ | x :: l' ->
+ if pred x then 1 + (count pred l') else count pred l'
+;;
+
+
+(**********************************************************************)
+(* attributes *)
+
+let check_attribute_value_lexically lexerset x t v =
+ (* raises x if the attribute value v does not match the lexical rules
+ * for attribute type t:
+ * - t = A_id: v must be a <name>
+ * - t = A_idref: v must match <name>
+ * - t = A_idrefs: v must match <names>
+ * - t = A_entity: v must match <name>
+ * - t = A_entities: v must match <names>
+ * - t = A_nmtoken: v must match <nmtoken>
+ * - t = A_nmtokens: v must match <nmtokens>
+ * - t = A_notation _: v must match <name>
+ * - t = A_enum _: v must match <nmtoken>
+ * - t = A_cdata: not checked
+ *)
+ let lexbuf = Lexing.from_string v in
+ let rec get_name_list() =
+ match lexerset.scan_name_string lexbuf with
+ Eof -> []
+ | Ignore -> get_name_list()
+ | tok -> tok :: get_name_list()
+ in
+ let l = get_name_list() in
+ match t with
+ (A_id | A_idref | A_entity | A_notation _) ->
+ begin match l with
+ [ Name n ] -> ()
+ | _ -> raise (Lazy.force x)
+ end
+ | (A_idrefs | A_entities) ->
+ if List.exists (fun tok ->
+ match tok with
+ Name _ -> false
+ | _ -> true) l then
+ raise (Lazy.force x)
+ | (A_nmtoken | A_enum _) ->
+ begin match l with
+ [ Name n ] -> ()
+ | [ Nametoken n ] -> ()
+ | _ -> raise (Lazy.force x)
+ end
+ | A_nmtokens ->
+ if List.exists (fun tok ->
+ match tok with
+ Name _ -> false
+ | Nametoken _ -> false
+ | _ -> true
+ ) l then
+ raise (Lazy.force x)
+ | _ -> ()
+;;
+
+
+let split_attribute_value lexerset v =
+ (* splits 'v' into a list of names or nmtokens. The white space separating
+ * the names/nmtokens in 'v' is suppressed and not returned.
+ *)
+ let lexbuf = Lexing.from_string v in
+ let rec get_name_list() =
+ match lexerset.scan_name_string lexbuf with
+ Eof -> []
+ | Ignore -> get_name_list()
+ | Name s -> s :: get_name_list()
+ | Nametoken s -> s :: get_name_list()
+ | _ -> raise(Validation_error("Illegal attribute value"))
+ in
+ get_name_list()
+;;
+
+
+let normalize_line_separators lexerset s =
+ let lexbuf = Lexing.from_string s in
+ let rec get_string() =
+ match lexerset.scan_for_crlf lexbuf with
+ Eof -> ""
+ | CharData s -> s ^ get_string()
+ | _ -> assert false
+ in
+ get_string()
+;;
+
+
+let value_of_attribute lexerset dtd n atype v =
+ (* The attribute with name 'n', type 'atype' and string value 'v' is
+ * decomposed, and the att_value is returned:
+ * - It is checked whether 'v' conforms to the lexical rules for attributes
+ * of type 'atype'
+ * - If 'atype <> A_cdata', leading and trailing spaces are removed from 'v'.
+ * - If 'atype = A_notation d', it is checked if 'v' matches one of the
+ * notation names contained in d.
+ * - If 'atype = A_enum d', it is checked whether 'v' matches one of the
+ * tokens from d
+ * - If 'atype' refers to a "single-value" type, the value is retured as
+ * Value u, where u is the normalized value. If 'atype' refers to a
+ * "list" type, the value if returned as Valuelist l, where l contains
+ * the tokens.
+ *
+ * Note that this function does not implement all normalization rules.
+ * It is expected that the string passed as 'v' is already preprocessed;
+ * i.e. character and entity references are resolved, and the substitution
+ * of white space characters by space characters has already been performed.
+ * If these requirements are met, the value returned by this function
+ * will be perfectly normalized.
+ *
+ * Further checks:
+ * - ENTITY and ENTITIES values: It is checked whether there is an
+ * unparsed general entity
+ * [ Other checks planned: ID, IDREF, IDREFS but not yet implemented ]
+ *)
+
+ let lexical_error() =
+ lazy (raise(Validation_error("Attribute `" ^ n ^ "' is lexically malformed"))) in
+
+ let remove_leading_and_trailing_spaces u =
+ (* Precondition: 'u' matches <name> or <nmtoken> *)
+ match split_attribute_value lexerset u with
+ [ u' ] -> u'
+ | _ -> assert false
+ in
+
+ let check_ndata_entity u =
+ let en, extdecl = dtd # gen_entity u in (* or Validation_error *)
+ if not (en # is_ndata) then
+ raise(Validation_error("Reference to entity `" ^ u ^
+ "': NDATA entity expected"));
+ if dtd # standalone_declaration && extdecl then
+ raise(Validation_error("Reference to entity `" ^ u ^
+ "' violates standalone declaration"));
+ in
+
+ match atype with
+ A_cdata ->
+ Value v
+
+ | (A_id | A_idref | A_nmtoken) ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ Value (remove_leading_and_trailing_spaces v)
+ | A_entity ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let v' = remove_leading_and_trailing_spaces v in
+ check_ndata_entity v';
+ Value v'
+
+ | (A_idrefs | A_nmtokens) ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ Valuelist (split_attribute_value lexerset v)
+
+ | A_entities ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let l = split_attribute_value lexerset v in
+ List.iter check_ndata_entity l;
+ Valuelist l
+
+ | A_notation nl ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let v' = remove_leading_and_trailing_spaces v in
+ if not (List.mem v' nl) then
+ raise(Validation_error
+ ("Attribute `" ^ n ^
+ "' does not match one of the declared notation names"));
+ Value v'
+
+ | A_enum enuml ->
+ check_attribute_value_lexically lexerset (lexical_error()) atype v;
+ let v' = remove_leading_and_trailing_spaces v in
+ if not (List.mem v' enuml) then
+ raise(Validation_error
+ ("Attribute `" ^ n ^
+ "' does not match one of the declared enumerator tokens"));
+ Value v'
+;;
+
+
+let normalization_changes_value lexerset atype v =
+ (* Returns true if:
+ * - 'atype' is a "single-value" type, and the normalization of the string
+ * value 'v' of this type discards leading and/or trailing spaces
+ * - 'atype' is a "list" type, and the normalization of the string value
+ * 'v' of this type discards leading and/or trailing spaces, or spaces
+ * separating the tokens of the list (i.e. the normal form is that
+ * the tokens are separated by exactly one space character).
+ *
+ * Note: It is assumed that TABs, CRs, and LFs in 'v' are already converted
+ * to spaces.
+ *)
+
+ match atype with
+ A_cdata ->
+ false
+
+ | (A_id | A_idref | A_entity | A_nmtoken | A_notation _ | A_enum _) ->
+ (* Return 'true' if the first or last character is a space.
+ * The following check works for both ISO-8859-1 and UTF-8.
+ *)
+ v <> "" && (v.[0] = ' ' || v.[String.length v - 1] = ' ')
+
+ | (A_idrefs | A_entities | A_nmtokens) ->
+ (* Split the list, and concatenate the tokens as required by
+ * the normal form. Return 'true' if this operation results in
+ * a different string than 'v'.
+ * This check works for both ISO-8859-1 and UTF-8.
+ *)
+ let l = split_attribute_value lexerset v in
+ let v' = String.concat " " l in
+ v <> v'
+;;
+
+
+(**********************************************************************)
+
+let write_markup_string ~(from_enc:rep_encoding) ~to_enc os s =
+ (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
+ * 'os'. All characters are written as they are.
+ *)
+ let s' =
+ if to_enc = (from_enc :> encoding)
+ then s
+ else recode_string
+ ~in_enc:(from_enc :> encoding)
+ ~out_enc:to_enc
+ ~subst:(fun n ->
+ failwith
+ ("Pxp_aux.write_markup_string: Cannot represent " ^
+ "code point " ^ string_of_int n))
+ s
+ in
+ write os s' 0 (String.length s')
+;;
+
+
+let write_data_string ~(from_enc:rep_encoding) ~to_enc os content =
+ (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to
+ * 'os'. The characters '&', '<', '>', '"', '%' and every character that
+ * cannot be represented in 'to_enc' are paraphrased as entity reference
+ * "&...;".
+ *)
+ let convert_ascii s =
+ (* Convert the ASCII-encoded string 's'. Note that 'from_enc' is
+ * always ASCII-compatible
+ *)
+ if to_enc = (from_enc :> encoding)
+ then s
+ else
+ recode_string
+ ~in_enc:(from_enc :> encoding)
+ ~out_enc:to_enc
+ ~subst:(fun n -> assert false)
+ s
+ in
+
+ let write_ascii s =
+ (* Write the ASCII-encoded string 's' *)
+ let s' = convert_ascii s in
+ write os s' 0 (String.length s')
+ in
+
+ let write_part j l =
+ (* Writes the substring of 'content' beginning at pos 'j' with length 'l'
+ *)
+ if to_enc = (from_enc :> encoding) then
+ write os content j l
+ else begin
+ let s' = recode_string
+ ~in_enc:(from_enc :> encoding)
+ ~out_enc:to_enc
+ ~subst:(fun n ->
+ convert_ascii ("&#" ^ string_of_int n ^ ";"))
+ (String.sub content j l)
+ in
+ write os s' 0 (String.length s')
+ end
+ in
+
+ let i = ref 0 in
+ for k = 0 to String.length content - 1 do
+ match content.[k] with
+ ('&' | '<' | '>' | '"' | '%') as c ->
+ if !i < k then
+ write_part !i (k - !i);
+ begin match c with
+ '&' -> write_ascii "&"
+ | '<' -> write_ascii "<"
+ | '>' -> write_ascii ">"
+ | '"' -> write_ascii """
+ | '%' -> write_ascii "%" (* reserved in DTDs *)
+ | _ -> assert false
+ end;
+ i := k+1
+ | _ -> ()
+ done;
+ if !i < String.length content then
+ write_part !i (String.length content - !i)
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.5 2000/07/25 00:30:01 gerd
+ * Added support for pxp:dtd PI options.
+ *
+ * Revision 1.4 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.3 2000/07/16 16:33:57 gerd
+ * New function write_markup_string: Handles the encoding
+ * of the string.
+ *
+ * Revision 1.2 2000/07/08 22:15:45 gerd
+ * [Merging 0.2.10:] write_data_string: The character '%' is special, too.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_aux.ml:
+ *
+ * Revision 1.12 2000/05/27 19:08:30 gerd
+ * Added functionality to check standalone declaration:
+ *
+ * expand_attvalue: Checks whether included entities violate the
+ * stand-alone declaration.
+ *
+ * value_of_attribute: Checks whether ENTITY/ENTITIES values violate
+ * this declaration. (Furthermore, it is checked whether the NDATA
+ * entity exists - this has been forgotten in previous versions.)
+ *
+ * value_of_attribute/check_attribute_value_lexically: improved.
+ *
+ * New function normalization_changes_value: helps detecting
+ * one case which violates the standalone declaration.
+ *
+ * Revision 1.11 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.10 2000/05/01 20:41:56 gerd
+ * New function write_data_string.
+ *
+ * Revision 1.9 2000/04/30 18:11:31 gerd
+ * New function normalize_line_separators.
+ * In function expand_attvalue: New argument norm_crlf. If the attvalue
+ * is read directly from a file, the sequence CR LF must be converted to a
+ * single space. If the attvalue is read from a replacement text, CR LF has
+ * already converted to a single LF, and CR LF, if still occurring, must be
+ * converted to two spaces. The caller can indicate the case by passing
+ * true/false as norm_crlf.
+ *
+ * Revision 1.8 1999/09/01 22:51:07 gerd
+ * Added functions.
+ * 'character' raises Illegal_character if characters are found that
+ * do not match the production Char.
+ *
+ * Revision 1.7 1999/09/01 16:17:37 gerd
+ * Added function 'check_name'.
+ *
+ * Revision 1.6 1999/08/15 20:33:19 gerd
+ * Added: a function that checks public identifiers. Only certain
+ * characters may occur in these identifiers.
+ * Control characters are rejected by the "character" function.
+ * Bugfix: recursive entity references are detected in attribute
+ * expansion
+ *
+ * Revision 1.5 1999/08/15 02:18:02 gerd
+ * That '<' is not allowed in attribute values, is a violation
+ * of well-formedness, not of the validity; so WF_error is raised.
+ *
+ * Revision 1.4 1999/08/15 00:20:37 gerd
+ * When expanding attribute values, references to parameter
+ * entities are now resolved by the method "replacement_text" which
+ * has an additional return value, and no longer by "attlist_replacement_text".
+ * The new return value indicates whether references to external entities
+ * have been resolved (directly or indirectly); this is allowed at some
+ * locations but not in attribute values.
+ *
+ * Revision 1.3 1999/08/14 22:05:53 gerd
+ * Several functions have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ *
+ * Revision 1.2 1999/08/10 21:35:06 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_document
+open Pxp_yacc
+open Pxp_dtd
+open Pxp_types
+
+let write_expr_ext_id out extid =
+ match extid with
+ System s ->
+ output_string out ("(Pxp_types.System\"" ^ String.escaped s ^ "\")")
+ | Public(s,t) ->
+ output_string out ("(Pxp_types.Public(\"" ^ String.escaped s ^
+ "\",\"" ^
+ String.escaped t ^ "\"))")
+ | Anonymous ->
+ output_string out "Pxp_types.Anonymous"
+;;
+
+
+let rec write_expr_content_model out cm =
+ match cm with
+ Unspecified -> output_string out "Pxp_types.Unspecified"
+ | Empty -> output_string out "Pxp_types.Empty"
+ | Any -> output_string out "Pxp_types.Any"
+ | Mixed msl -> output_string out "(Pxp_types.Mixed [";
+ List.iter
+ (fun ms ->
+ write_expr_mixed_spec out ms;
+ output_string out "; ";
+ )
+ msl;
+ output_string out "])";
+ | Regexp re -> output_string out "(Pxp_types.Regexp ";
+ write_expr_regexp_spec out re;
+ output_string out ")";
+
+and write_expr_mixed_spec out ms =
+ match ms with
+ MPCDATA -> output_string out "Pxp_types.MPCDATA"
+ | MChild s -> output_string out ("(Pxp_types.MChild \"" ^
+ String.escaped s ^ "\")")
+
+and write_expr_regexp_spec out re =
+ match re with
+ Optional re' -> output_string out "(Pxp_types.Optional ";
+ write_expr_regexp_spec out re';
+ output_string out ")";
+ | Repeated re' -> output_string out "(Pxp_types.Repeated ";
+ write_expr_regexp_spec out re';
+ output_string out ")";
+ | Repeated1 re' -> output_string out "(Pxp_types.Repeated1 ";
+ write_expr_regexp_spec out re';
+ output_string out ")";
+ | Alt rel -> output_string out "(Pxp_types.Alt [";
+ List.iter
+ (fun re' ->
+ write_expr_regexp_spec out re';
+ output_string out "; ";
+ )
+ rel;
+ output_string out "])";
+ | Seq rel -> output_string out "(Pxp_types.Seq [";
+ List.iter
+ (fun re' ->
+ write_expr_regexp_spec out re';
+ output_string out "; ";
+ )
+ rel;
+ output_string out "])";
+ | Child s -> output_string out ("(Pxp_types.Child \"" ^
+ String.escaped s ^ "\")")
+;;
+
+
+let write_expr_att_type out at =
+ match at with
+ A_cdata -> output_string out "Pxp_types.A_cdata"
+ | A_id -> output_string out "Pxp_types.A_id"
+ | A_idref -> output_string out "Pxp_types.A_idref"
+ | A_idrefs -> output_string out "Pxp_types.A_idrefs"
+ | A_entity -> output_string out "Pxp_types.A_entity"
+ | A_entities -> output_string out "Pxp_types.A_entities"
+ | A_nmtoken -> output_string out "Pxp_types.A_nmtoken"
+ | A_nmtokens -> output_string out "Pxp_types.A_nmtokens"
+ | A_notation sl -> output_string out "(Pxp_types.A_notation [";
+ List.iter
+ (fun s ->
+ output_string out ("\"" ^
+ String.escaped s ^ "\"; "))
+ sl;
+ output_string out "])";
+ | A_enum sl -> output_string out "(Pxp_types.A_enum [";
+ List.iter
+ (fun s ->
+ output_string out ("\"" ^
+ String.escaped s ^ "\"; "))
+ sl;
+ output_string out "])";
+;;
+
+
+let write_expr_att_default out ad =
+ match ad with
+ D_required -> output_string out "Pxp_types.D_required"
+ | D_implied -> output_string out "Pxp_types.D_implied"
+ | D_default s -> output_string out ("(Pxp_types.D_default \"" ^
+ String.escaped s ^ "\")")
+ | D_fixed s -> output_string out ("(Pxp_types.D_fixed \"" ^
+ String.escaped s ^ "\")")
+;;
+
+
+let write_expr_att_value out av =
+ match av with
+ Value s -> output_string out ("(Pxp_types.Value \"" ^
+ String.escaped s ^ "\")")
+ | Valuelist sl -> output_string out ("(Pxp_types.Valuelist [");
+ List.iter
+ (fun s ->
+ output_string out ("\"" ^ String.escaped s ^
+ "\"; ")
+ )
+ sl;
+ output_string out "])";
+ | Implied_value -> output_string out "Pxp_types.Implied_value"
+;;
+
+
+let ocaml_encoding enc =
+ match enc with
+ `Enc_utf8 -> "`Enc_utf8"
+ | `Enc_utf16 -> "`Enc_utf16"
+ | `Enc_utf16_le -> "`Enc_utf16_le"
+ | `Enc_utf16_be -> "`Enc_utf16_be"
+ | `Enc_iso88591 -> "`Enc_iso88591"
+;;
+
+
+let write_expr_new_pi out pi =
+ output_string out ("(new Pxp_dtd.proc_instruction \"" ^
+ String.escaped(pi # target) ^ "\" \"" ^
+ String.escaped(pi # value) ^ "\" " ^
+ ocaml_encoding(pi # encoding) ^ ")")
+;;
+
+
+let write_expr_node_type out nt =
+ match nt with
+ T_data -> output_string out "Pxp_document.T_data"
+ | T_element s -> output_string out ("(Pxp_document.T_element \"" ^
+ String.escaped s ^ "\")")
+ | T_super_root -> output_string out "Pxp_document.T_super_root"
+ | T_pinstr s -> output_string out ("(Pxp_document.T_pinstr \"" ^
+ String.escaped s ^ "\")")
+ | T_comment -> output_string out "Pxp_document.T_comment"
+ | _ -> assert false
+;;
+
+
+let write_local_dtd out (dtd : dtd) =
+ (* Outputs "let mkdtd warner = ... in" to 'out' *)
+ output_string out "let mkdtd warner =\n";
+ output_string out ("let encoding = " ^ ocaml_encoding (dtd # encoding) ^
+ " in\n");
+ output_string out "let dtdobj = new Pxp_dtd.dtd warner encoding in\n";
+
+ (* Set the ID: *)
+ output_string out "dtdobj # set_id ";
+ begin match dtd # id with
+ None -> ()
+ | Some(External x) ->
+ output_string out "(Pxp_types.External ";
+ write_expr_ext_id out x;
+ output_string out ");\n"
+ | Some(Derived x) ->
+ output_string out "(Pxp_types.Derived ";
+ write_expr_ext_id out x;
+ output_string out ");\n"
+ | Some Internal ->
+ output_string out "Pxp_types.Internal;\n";
+ end;
+
+ (* Set standalone declaration: *)
+ output_string out ("dtdobj # set_standalone_declaration " ^
+ string_of_bool (dtd # standalone_declaration) ^ ";\n");
+
+ (* Add notations: *)
+ List.iter
+ (fun noname ->
+ let no = dtd # notation noname in
+ output_string out ("let no = new Pxp_dtd.dtd_notation \"" ^
+ String.escaped noname ^ "\" ");
+ write_expr_ext_id out (no # ext_id);
+ output_string out " encoding in\n";
+ output_string out "dtdobj # add_notation no;\n";
+ )
+ (List.sort Pervasives.compare (dtd # notation_names));
+
+ (* Add unparsed entities: *)
+ List.iter
+ (fun enname ->
+ let en, _ = dtd # gen_entity enname in
+ if en # is_ndata then begin
+ let ext_id = en # ext_id in
+ let notation = en # notation in
+ let encoding = en # encoding in
+ output_string out ("let ndata = new Pxp_entity.ndata_entity \"" ^
+ String.escaped enname ^ "\" ");
+ write_expr_ext_id out ext_id;
+ output_string out ("\"" ^ String.escaped notation ^ "\" " ^
+ ocaml_encoding encoding ^ " in \n");
+ output_string out "dtdobj # add_gen_entity (ndata :> Pxp_entity.entity) false;\n";
+ end;
+ )
+ (List.sort Pervasives.compare (dtd # gen_entity_names));
+
+
+ (* Add elements: *)
+ List.iter
+ (fun elname ->
+ (* Create the element 'el': *)
+ let el = dtd # element elname in
+ output_string out ("let el = new Pxp_dtd.dtd_element dtdobj \"" ^
+ String.escaped elname ^ "\" in\n");
+ output_string out "let cm = ";
+ write_expr_content_model out (el # content_model);
+ output_string out " in\n";
+ output_string out "el # set_cm_and_extdecl cm false;\n";
+ (* Add attributes: *)
+ List.iter
+ (fun attname ->
+ let atttype, attdefault = el # attribute attname in
+ output_string out ("el # add_attribute \"" ^
+ String.escaped attname ^ "\" ");
+ write_expr_att_type out atttype;
+ output_string out " ";
+ write_expr_att_default out attdefault;
+ output_string out " false;\n";
+ )
+ (List.sort Pervasives.compare (el # attribute_names));
+
+ (* Allow arbitrary? *)
+ if el # arbitrary_allowed then
+ output_string out "el # allow_arbitrary;\n"
+ else
+ output_string out "el # disallow_arbitrary;\n";
+
+ (* Validate: *)
+ output_string out "el # validate;\n";
+
+ (* Add the element 'el' to 'dtdobj': *)
+ output_string out "dtdobj # add_element el;\n";
+ )
+ (List.sort Pervasives.compare (dtd # element_names));
+
+ (* Add processing instructions: *)
+ List.iter
+ (fun target ->
+ let pilist = dtd # pinstr target in
+ List.iter
+ (fun pi ->
+ output_string out "let pi = ";
+ write_expr_new_pi out pi;
+ output_string out " in\n";
+ output_string out "dtdobj # add_pinstr pi;\n";
+ )
+ pilist;
+ )
+ (List.sort Pervasives.compare (dtd # pinstr_names));
+
+ (* Set the name of the root element: *)
+ begin match dtd # root with
+ None -> ()
+ | Some rootname ->
+ output_string out ("dtdobj # set_root \"" ^
+ String.escaped rootname ^ "\";\n")
+ end;
+
+ (* Special options: *)
+ if dtd # arbitrary_allowed then
+ output_string out "dtdobj # allow_arbitrary;\n"
+ else
+ output_string out "dtdobj # disallow_arbitrary;\n";
+
+ (* Return dtdobj: *)
+ output_string out "dtdobj in\n"
+;;
+
+
+let rec write_local_subtree out n =
+ (* Outputs the term generating the subtree *)
+
+ output_string out "let nt = ";
+ write_expr_node_type out (n # node_type);
+ output_string out " in\n";
+
+ begin match n # node_type with
+ T_data ->
+ output_string out ("let t = Pxp_document.create_data_node spec dtd \"" ^
+ String.escaped (n # data) ^ "\" in\n")
+ | T_element elname ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out
+ ("let t = Pxp_document.create_element_node ~position:pos spec dtd \"" ^
+ String.escaped elname ^ "\" [ ");
+ List.iter
+ (fun (name,value) ->
+ begin match value with
+ Value s ->
+ output_string out ("\"" ^ String.escaped name ^ "\", ");
+ output_string out ("\"" ^ String.escaped s ^ "\"; ")
+ | Valuelist sl ->
+ output_string out ("\"" ^ String.escaped name ^ "\", ");
+ output_string out ("\"" ^
+ String.escaped (String.concat " " sl) ^
+ "\"; ")
+ | Implied_value ->
+ ()
+ end
+ )
+ (n # attributes);
+ output_string out " ] in\n";
+ | T_super_root ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out
+ ("let t = Pxp_document.create_super_root_node ~position:pos spec dtd in\n")
+ | T_pinstr piname ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out "let pi = ";
+ write_expr_new_pi out (List.hd (n # pinstr piname));
+ output_string out " in\n";
+ output_string out
+ ("let t = Pxp_document.create_pinstr_node ~position:pos spec dtd pi in\n")
+ | T_comment ->
+ let loc, line, col = n # position in
+ output_string out
+ ("let pos = \"" ^ String.escaped loc ^ "\", " ^
+ string_of_int line ^ ", " ^
+ string_of_int col ^ " in\n");
+ output_string out "let comment = ";
+ ( match n # comment with
+ None -> assert false
+ | Some c -> output_string out ("\"" ^ String.escaped c ^ "\"")
+ );
+ output_string out " in\n";
+ output_string out
+ ("let t = Pxp_document.create_comment_node ~position:pos spec dtd comment in\n")
+ | _ ->
+ assert false
+ end;
+
+ (* Add processing instructions: *)
+ begin match n # node_type with
+ T_pinstr _ ->
+ ()
+ | _ ->
+ List.iter
+ (fun target ->
+ let pilist = n # pinstr target in
+ List.iter
+ (fun pi ->
+ output_string out "let pi = ";
+ write_expr_new_pi out pi;
+ output_string out " in\n";
+ output_string out "add_pinstr t pi;\n";
+ )
+ pilist;
+ )
+ (List.sort Pervasives.compare (n # pinstr_names));
+ end;
+
+ (* Add the sub nodes: *)
+ n # iter_nodes
+ (fun n' ->
+ output_string out "add_node t (\n";
+ write_local_subtree out n';
+ output_string out ");\n";
+ );
+
+ (* Validate: *)
+ output_string out "local_validate t;\n";
+
+ (* Return: *)
+ output_string out "t\n"
+;;
+
+
+let write_local_document out (d : 'ext document) =
+ (* Outputs "let mkdoc warner spec = ... in" *)
+
+ output_string out "let mkdoc warner spec =\n";
+ output_string out "let doc = new Pxp_document.document warner in\n";
+ output_string out ("doc # init_xml_version \"" ^
+ String.escaped (d # xml_version) ^ "\";\n");
+ write_local_dtd out (d # dtd);
+ output_string out "let dtd = mkdtd warner in\n";
+ output_string out "let root = ";
+ write_local_subtree out (d # root);
+ output_string out " in\n";
+ output_string out "doc # init_root root;\n";
+
+ (* Add processing instructions: *)
+ List.iter
+ (fun target ->
+ let pilist = d # pinstr target in
+ List.iter
+ (fun pi ->
+ output_string out "let pi = ";
+ write_expr_new_pi out pi;
+ output_string out " in\n";
+ output_string out "doc # add_pinstr pi;\n";
+ )
+ pilist;
+ )
+ (List.sort Pervasives.compare (d # pinstr_names));
+
+ (* Return the result: *)
+ output_string out "doc in\n"
+;;
+
+
+let write_helpers out =
+ output_string out "let add_node t n = (t : 'ext Pxp_document.node) # add_node (n : 'ext Pxp_document.node) in\n";
+ output_string out "let add_pinstr t pi = (t : 'ext Pxp_document.node) # add_pinstr (pi : Pxp_dtd.proc_instruction) in\n";
+ output_string out "let local_validate t = (t : 'ext Pxp_document.node) # local_validate ()in\n"
+;;
+
+
+let write_document out d =
+ output_string out "let create_document warner spec =\n";
+ write_helpers out;
+ write_local_document out d;
+ output_string out "mkdoc warner spec;;\n"
+;;
+
+
+let write_dtd out dtd =
+ output_string out "let create_dtd warner =\n";
+ write_local_dtd out dtd;
+ output_string out "mkdtd warner;;\n"
+;;
+
+
+let write_subtree out t =
+ output_string out "let create_subtree dtd spec =\n";
+ write_helpers out;
+ write_local_subtree out t;
+ output_string out "mktree dtd spec;;\n"
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/30 15:48:07 gerd
+ * Minor update.
+ *
+ * Revision 1.6 2000/08/18 20:16:59 gerd
+ * Updates because of new node types T_comment, T_pinstr, T_super_root.
+ *
+ * Revision 1.5 2000/07/23 02:16:51 gerd
+ * Changed signature of local_validate.
+ *
+ * Revision 1.4 2000/07/09 17:59:35 gerd
+ * Updated: The position of element nodes is also written.
+ *
+ * Revision 1.3 2000/07/09 00:30:00 gerd
+ * Notations are written before they are used.
+ * Unparsed entities are included.
+ * Further changes.
+ *
+ * Revision 1.2 2000/07/08 22:59:14 gerd
+ * [Merging 0.2.10:] Improved: The resulting code can be compiled
+ * faster, and the compiler is less hungry on memory.
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_codewriter.ml:
+ *
+ * Revision 1.1 2000/03/11 22:57:28 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_document
+open Pxp_yacc
+open Pxp_dtd
+
+val write_document : out_channel -> 'ext document -> unit
+ (* Writes O'Caml code to the out_channel that is a top-level function
+ * creating a fresh document which is equal to the passed document:
+ *
+ * "let create_document warner spec = ...;;"
+ *
+ * If you compile the code and call "create_document warner map" the
+ * function creates a document tree which is (almost) equal to the
+ * passed document.
+ *
+ * The following properties may not be equal:
+ * - Parsed entities
+ * - Whether a declaration occurs in an external entity or not
+ *
+ * 'warner': a collect_warnings object
+ * 'spec': a Pxp_document.spec
+ *)
+
+
+val write_dtd : out_channel -> dtd -> unit
+ (* Writes O'Caml code to the out_channel that is a top-level function
+ * creating a fresh DTD which is equal to the passed DTD:
+ *
+ * "let create_dtd warner = ...;;"
+ *
+ * If you compile the code and call "create_dtd warner" the
+ * function creates a DTD object which is (almost) equal to the
+ * passed object.
+ *
+ * The following properties may not be equal:
+ * - Parsed entities
+ * - Whether a declaration occurs in an external entity or not
+ *
+ * 'warner': a collect_warnings object
+ *)
+
+val write_subtree : out_channel -> 'ext node -> unit
+ (* Writes O'Caml code to the out_channel that is a top-level function
+ * creating a fresh node tree which is equal to the passed tree:
+ *
+ * "let create_subtree dtd map = ...;;"
+ *
+ * If you compile the code and call "create_subtree dtd map" the
+ * function creates a DTD object which is equal to the passed object.
+ *
+ * 'dtd': a DTD object
+ * 'map': a domspec
+ *)
+
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/07/09 00:30:14 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_codewriter.mli:
+ *
+ * Revision 1.1 2000/03/11 22:57:28 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module StringOrd = struct
+ type t = string
+ let compare = (compare : string -> string -> int)
+end;;
+
+module StringMap = Map.Make(StringOrd);;
+ (* 'a StringMap.t: the type of maps (dictionaries) from string to 'a *)
+
+module Graph = struct
+ type vertex =
+ { mutable edges_out : (string * vertex) list;
+ mutable edges_out_map : vertex StringMap.t;
+ mutable edges_in : (vertex * string) list;
+ mutable graph : graph;
+ mutable id : int;
+ }
+ and graph =
+ { mutable vertexes : vertex list;
+ mutable mid : int; (* maximum id + 1 *)
+ }
+
+ exception Edge_not_unique
+
+ let create () =
+ { vertexes = [];
+ mid = 0;
+ }
+
+ let new_vertex g =
+ let v =
+ { edges_out = [];
+ edges_out_map = StringMap.empty;
+ edges_in = [];
+ graph = g;
+ id = g.mid;
+ } in
+ g.vertexes <- v :: g.vertexes;
+ g.mid <- g.mid + 1;
+ v
+
+ let new_edge v_from e v_to =
+ if v_from.graph != v_to.graph then
+ invalid_arg "Pxp_dfa.Graph.new_edge";
+ try
+ let v = StringMap.find e v_from.edges_out_map in
+ if v != v_to then
+ raise Edge_not_unique;
+ with
+ Not_found ->
+ v_from.edges_out <- (e, v_to) :: v_from.edges_out;
+ v_from.edges_out_map <- StringMap.add e v_to v_from.edges_out_map;
+ v_to.edges_in <- (v_from, e) :: v_to.edges_in;
+ ()
+
+ let graph_of_vertex v = v.graph
+
+ let union g1 g2 =
+ List.iter
+ (fun v ->
+ v.graph <- g1;
+ v.id <- v.id + g1.mid;
+ )
+ g2.vertexes;
+ g1.vertexes <- g2.vertexes @ g1.vertexes;
+ g1.mid <- g1.mid + g2.mid;
+ g2.vertexes <- [];
+ g2.mid <- 0
+
+ let outgoing_edges v =
+ v.edges_out
+
+ let ingoing_edges v =
+ v.edges_in
+
+ let follow_edge v e =
+ StringMap.find e v.edges_out_map (* or raise Not_found *)
+end
+;;
+
+
+module VertexOrd = struct
+ type t = Graph.vertex
+ let compare v1 v2 =
+ if v1.Graph.graph != v2.Graph.graph then
+ invalid_arg "Pxp_dfa.VertexOrd.compare";
+ compare v1.Graph.id v2.Graph.id
+end
+;;
+
+module VertexSet = Set.Make(VertexOrd);;
+
+
+type dfa_definition =
+ { dfa_graph : Graph.graph;
+ dfa_start : Graph.vertex;
+ dfa_stops : VertexSet.t;
+ dfa_null : bool;
+ }
+;;
+
+(**********************************************************************)
+
+(* Now that we have all the auxiliary data types, it is time for the
+ * algorithm that transforms regexps to DFAs.
+ *)
+
+open Pxp_types
+
+let dfa_of_regexp_content_model re =
+ let rec get_dfa re =
+ match re with
+ Child e ->
+ let g = Graph.create() in
+ let v1 = Graph.new_vertex g in
+ let v2 = Graph.new_vertex g in
+ Graph.new_edge v1 e v2;
+ { dfa_graph = g;
+ dfa_start = v1;
+ dfa_stops = VertexSet.singleton v2;
+ dfa_null = false;
+ }
+
+ | Seq [] ->
+ invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+ | Seq [re'] ->
+ get_dfa re'
+ | Seq (re1 :: seq2) ->
+ let dfa1 = get_dfa re1 in
+ let dfa2 = get_dfa (Seq seq2) in
+ (* Merge the two graphs. The result is in dfa1.dfa_graph: *)
+ Graph.union dfa1.dfa_graph dfa2.dfa_graph;
+ (* Concatenation I: Add additional edges to the graph such
+ * that if w1 matches dfa1, and w2 matches dfa2, and w2 is not
+ * empty, w1w2 will match the merged DFAs.
+ *)
+ List.iter
+ (fun (e,v') ->
+ VertexSet.iter
+ (fun v ->
+ Graph.new_edge v e v')
+ dfa1.dfa_stops
+ )
+ (Graph.outgoing_edges dfa2.dfa_start);
+ (* Concatenation II: If the emtpy string matches dfa2, the stop
+ * nodes of dfa1 remain stop nodes.
+ *)
+ let stops =
+ if dfa2.dfa_null then
+ VertexSet.union dfa1.dfa_stops dfa2.dfa_stops
+ else
+ dfa2.dfa_stops
+ in
+ (* The resulting DFA: *)
+ { dfa_graph = dfa1.dfa_graph;
+ dfa_start = dfa1.dfa_start;
+ dfa_stops = stops;
+ dfa_null = dfa1.dfa_null && dfa2.dfa_null;
+ }
+
+ | Alt [] ->
+ invalid_arg "Pxp_dfa.dfa_of_regexp_content_model"
+ | Alt [re'] ->
+ get_dfa re'
+ | Alt alt ->
+ let dfa_alt = List.map get_dfa alt in
+ (* Merge the graphs. The result is in g: *)
+ let g = (List.hd dfa_alt).dfa_graph in
+ List.iter
+ (fun dfa ->
+ Graph.union g dfa.dfa_graph
+ )
+ (List.tl dfa_alt);
+ (* Get the new start node: *)
+ let start = Graph.new_vertex g in
+ (* Add the new edges starting at 'start': *)
+ List.iter
+ (fun dfa ->
+ List.iter
+ (fun (e, v) ->
+ Graph.new_edge start e v)
+ (Graph.outgoing_edges dfa.dfa_start)
+ )
+ dfa_alt;
+ (* If one of the old start nodes was a stop node, the new start
+ * node will be a stop node, too.
+ *)
+ let null = List.exists (fun dfa -> dfa.dfa_null) dfa_alt in
+ let stops =
+ List.fold_left
+ (fun s dfa -> VertexSet.union s dfa.dfa_stops)
+ VertexSet.empty
+ dfa_alt in
+ let stops' =
+ if null then
+ VertexSet.union stops (VertexSet.singleton start)
+ else
+ stops in
+ (* The resulting DFA: *)
+ { dfa_graph = g;
+ dfa_start = start;
+ dfa_stops = stops';
+ dfa_null = null;
+ }
+
+ | Optional re' ->
+ let dfa' = get_dfa re' in
+ if dfa'.dfa_null then
+ (* simple case *)
+ dfa'
+ else begin
+ (* Optimization possible: case ingoing_edges dfa_start = [] *)
+ let start = Graph.new_vertex dfa'.dfa_graph in
+ List.iter
+ (fun (e, v) ->
+ Graph.new_edge start e v)
+ (Graph.outgoing_edges dfa'.dfa_start);
+
+ (* The resulting DFA: *)
+ { dfa_graph = dfa'.dfa_graph;
+ dfa_start = start;
+ dfa_stops = VertexSet.union dfa'.dfa_stops
+ (VertexSet.singleton start);
+ dfa_null = true;
+ }
+ end
+
+ | Repeated1 re' ->
+ let dfa' = get_dfa re' in
+ List.iter
+ (fun (e, v') ->
+ VertexSet.iter
+ (fun v ->
+ Graph.new_edge v e v')
+ dfa'.dfa_stops
+ )
+ (Graph.outgoing_edges dfa'.dfa_start);
+
+ (* The resulting DFA: *)
+ { dfa_graph = dfa'.dfa_graph;
+ dfa_start = dfa'.dfa_start;
+ dfa_stops = dfa'.dfa_stops;
+ dfa_null = dfa'.dfa_null;
+ }
+
+ | Repeated re' ->
+ get_dfa (Optional (Repeated1 re'))
+
+ in
+ try
+ get_dfa re
+ with
+ Graph.Edge_not_unique -> raise Not_found
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/07/23 02:16:08 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+module Graph : sig
+ type graph
+ type vertex
+
+ (* A directed graph whose edges are marked with strings (= element types)
+ * and with the constraint that for a given vertex and a given element
+ * type the edge must be unique.
+ *)
+
+ exception Edge_not_unique
+
+ val create : unit -> graph
+ (* Creates an empty graph *)
+
+ val new_vertex : graph -> vertex
+ (* Adds a new vertex to the graph, and returns the vertex *)
+
+ val new_edge : vertex -> string -> vertex -> unit
+ (* new_edge v_from etype v_to:
+ * Adds a new edge from vertex v_from to vertex v_to, marked with
+ * etype.
+ * Raises Edge_not_unique if there is already an edge etype starting
+ * at v_from to a different vertex than v_to.
+ *)
+
+ val graph_of_vertex : vertex -> graph
+ (* Returns the graph the passed vertex is contained in. *)
+
+ val union : graph -> graph -> unit
+ (* union g1 g2:
+ * Moves the vertexes and edged found in g2 to g1.
+ * After that, g2 is empty again.
+ *)
+
+ val outgoing_edges : vertex -> (string * vertex) list
+ (* Returns the list of outgoing edges starting in the passed vertex *)
+
+ val follow_edge : vertex -> string -> vertex
+ (* Follows the edge starting in the passed vertex which is marked
+ * with the passed element type.
+ * Raises Not_found if there is no such edge.
+ *)
+
+ val ingoing_edges : vertex -> (vertex * string) list
+ (* Returns the list of ingoing edges ending in the passed vertex *)
+end
+
+module VertexSet : Set.S with type elt = Graph.vertex
+
+
+type dfa_definition =
+ { dfa_graph : Graph.graph;
+ dfa_start : Graph.vertex; (* Where the automaton starts *)
+ dfa_stops : VertexSet.t; (* Where the automaton may stop *)
+ dfa_null : bool; (* Whether dfa_start member of dfa_stops *)
+ }
+
+val dfa_of_regexp_content_model : Pxp_types.regexp_spec -> dfa_definition
+ (* Computes the DFA or raises Not_found if it does not exist *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/07/23 02:16:08 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_dtd
+open Pxp_aux
+open Pxp_dfa
+
+
+exception Skip
+
+type node_type =
+ T_element of string
+ | T_data
+ | T_super_root
+ | T_pinstr of string
+ | T_comment
+ | T_none
+ | T_attribute of string
+ | T_namespace of string
+;;
+
+
+class type ['node] extension =
+ object ('self)
+ method clone : 'self
+ method node : 'node
+ method set_node : 'node -> unit
+ end
+;;
+
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+ method extension : 'ext
+ method delete : unit
+ method parent : 'ext node
+ method root : 'ext node
+ method orphaned_clone : 'self
+ method orphaned_flat_clone : 'self
+ method add_node : ?force:bool -> 'ext node -> unit
+ method add_pinstr : proc_instruction -> unit
+ method pinstr : string -> proc_instruction list
+ method pinstr_names : string list
+ method node_position : int
+ method node_path : int list
+ method sub_nodes : 'ext node list
+ method iter_nodes : ('ext node -> unit) -> unit
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method nth_node : int -> 'ext node
+ method previous_node : 'ext node
+ method next_node : 'ext node
+ method set_nodes : 'ext node list -> unit
+ method data : string
+ method node_type : node_type
+ method position : (string * int * int)
+ method attribute : string -> att_value
+ method attribute_names : string list
+ method attribute_type : string -> att_type
+ method attributes : (string * Pxp_types.att_value) list
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ method id_attribute_name : string
+ method id_attribute_value : string
+ method idref_attribute_names : string list
+ method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ method attributes_as_nodes : 'ext node list
+ method set_comment : string option -> unit
+ method comment : string option
+ method dtd : dtd
+ method encoding : rep_encoding
+ method create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list -> 'ext node
+ method create_data : dtd -> string -> 'ext node
+ method local_validate : ?use_dfa:bool -> unit -> unit
+ method keep_always_whitespace_mode : unit
+ method write : output_stream -> encoding -> unit
+ method write_compact_as_latin1 : output_stream -> unit
+ method internal_adopt : 'ext node option -> int -> unit
+ method internal_set_pos : int -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : (string * int * int) ->
+ dtd -> string -> (string * string) list -> unit
+ method internal_init_other : (string * int * int) ->
+ dtd -> node_type -> unit
+ end
+;;
+
+type 'ext spec_table =
+ { mapping : (string, 'ext node) Hashtbl.t;
+ data_node : 'ext node;
+ default_element : 'ext node;
+ super_root_node : 'ext node option;
+ pinstr_mapping : (string, 'ext node) Hashtbl.t;
+ default_pinstr_node : 'ext node option;
+ comment_node : 'ext node option;
+ }
+;;
+
+type 'ext spec =
+ Spec_table of 'ext spec_table
+;;
+
+
+let make_spec_from_mapping
+ ?super_root_exemplar
+ ?comment_exemplar
+ ?default_pinstr_exemplar
+ ?pinstr_mapping
+ ~data_exemplar ~default_element_exemplar ~element_mapping () =
+ Spec_table
+ { mapping = element_mapping;
+ data_node = data_exemplar;
+ default_element = default_element_exemplar;
+ super_root_node = super_root_exemplar;
+ comment_node = comment_exemplar;
+ default_pinstr_node = default_pinstr_exemplar;
+ pinstr_mapping =
+ (match pinstr_mapping with
+ None -> Hashtbl.create 1
+ | Some m -> m
+ )
+ }
+;;
+
+
+let make_spec_from_alist
+ ?super_root_exemplar
+ ?comment_exemplar
+ ?default_pinstr_exemplar
+ ?(pinstr_alist = [])
+ ~data_exemplar ~default_element_exemplar ~element_alist () =
+ let m = List.length pinstr_alist in
+ let pinstr_mapping = Hashtbl.create m in
+ List.iter
+ (fun (name,ex) -> Hashtbl.add pinstr_mapping name ex)
+ pinstr_alist;
+ let n = List.length element_alist in
+ let element_mapping = Hashtbl.create m in
+ List.iter
+ (fun (name,ex) -> Hashtbl.add element_mapping name ex)
+ element_alist;
+ make_spec_from_mapping
+ ?super_root_exemplar: super_root_exemplar
+ ?comment_exemplar: comment_exemplar
+ ?default_pinstr_exemplar: default_pinstr_exemplar
+ ~pinstr_mapping: pinstr_mapping
+ ~data_exemplar: data_exemplar
+ ~default_element_exemplar: default_element_exemplar
+ ~element_mapping: element_mapping
+ ()
+;;
+
+(**********************************************************************)
+
+exception Found;;
+
+let validate_content ?(use_dfa=None) model (el : 'a node) =
+ (* checks that the nodes of 'el' matches the DTD. Returns 'true'
+ * on success and 'false' on failure.
+ *)
+
+ let rec is_empty cl =
+ (* Whether the node list counts as empty or not. *)
+ match cl with
+ [] -> true
+ | n :: cl' ->
+ ( match n # node_type with
+ | T_element _ -> false
+ | _ -> is_empty cl' (* ignore other nodes *)
+ )
+ in
+
+ let rec run_regexp cl ml =
+ (* Validates regexp content models ml against instances cl. This
+ * function works for deterministic and non-determninistic models.
+ * The implementation uses backtracking and may sometimes be slow.
+ *
+ * cl: the list of children that will have to be matched
+ * ml: the list of regexps that will have to match (to be read as
+ * sequence)
+ * returns () meaning that no match has been found, or raises Found.
+ *)
+ match ml with
+ [] ->
+ if cl = [] then raise Found; (* Frequent case *)
+ if is_empty cl then raise Found; (* General condition *)
+ | Seq seq :: ml' ->
+ assert (seq <> []); (* necessary to ensure termination *)
+ run_regexp cl (seq @ ml')
+ | Alt alts :: ml' ->
+ let rec find alts =
+ match alts with
+ [] -> ()
+ | alt :: alts' ->
+ run_regexp cl (alt :: ml');
+ find alts'
+ in
+ assert (alts <> []); (* Alt [] matches nothing *)
+ find alts
+ | Repeated re :: ml' ->
+ let rec norm re = (* to avoid infinite loops *)
+ match re with
+ Repeated subre -> norm subre (* necessary *)
+ | Optional subre -> norm subre (* necessary *)
+ | Repeated1 subre -> norm subre (* an optimization *)
+ | _ -> re
+ in
+ let re' = norm re in
+ run_regexp cl (re' :: Repeated re' :: ml');
+ run_regexp cl ml'
+ | Repeated1 re :: ml' ->
+ run_regexp cl (re :: Repeated re :: ml')
+ | Optional re :: ml' ->
+ run_regexp cl (re :: ml');
+ run_regexp cl ml';
+ | Child chld :: ml' ->
+ match cl with
+ [] ->
+ ()
+ | sub_el :: cl' ->
+ begin match sub_el # node_type with
+ T_data -> (* Ignore data *)
+ run_regexp cl' ml
+ (* Note: It can happen that we find a data node here
+ * if the 'keep_always_whitespace' mode is turned on.
+ *)
+ | T_element nt ->
+ if nt = chld then run_regexp cl' ml'
+ | _ -> (* Ignore this element *)
+ run_regexp cl' ml
+ end
+ in
+
+ let run_dfa cl dfa =
+ (* Validates regexp content models ml against instances cl. This
+ * function works ONLY for deterministic models.
+ * The implementation executes the automaton.
+ *)
+ let current_vertex = ref dfa.dfa_start in
+ let rec next_step cl =
+ match cl with
+ el :: cl' ->
+ begin match el # node_type with
+ T_data -> (* Ignore data *)
+ next_step cl'
+ (* Note: It can happen that we find a data node here
+ * if the 'keep_always_whitespace' mode is turned on.
+ *)
+ | T_element nt ->
+ begin try
+ current_vertex := Graph.follow_edge !current_vertex nt;
+ next_step cl'
+ with
+ Not_found -> false
+ end
+ | _ -> (* Ignore this node *)
+ next_step cl'
+ end
+ | [] ->
+ VertexSet.mem !current_vertex dfa.dfa_stops
+ in
+ next_step cl
+ in
+
+ match model with
+ Unspecified -> true
+ | Any -> true
+ | Empty ->
+ let cl = el # sub_nodes in
+ is_empty cl
+ | Mixed (MPCDATA :: mix) ->
+ let mix' = List.map (function
+ MPCDATA -> assert false
+ | MChild x -> x)
+ mix in
+ begin try
+ el # iter_nodes
+ (fun sub_el ->
+ let nt = sub_el # node_type in
+ match nt with
+ | T_element name ->
+ if not (List.mem name mix') then raise Not_found;
+ | _ -> ()
+ );
+ true
+ with
+ Not_found ->
+ false
+ end
+ | Regexp re ->
+ let cl = el # sub_nodes in
+ begin match use_dfa with
+ None ->
+ (* General backtracking implementation: *)
+ begin try
+ run_regexp cl [re];
+ false
+ with
+ Found -> true
+ end
+ | Some dfa ->
+ run_dfa cl dfa
+ end
+
+ | _ -> assert false
+;;
+
+(**********************************************************************)
+
+
+class virtual ['ext] node_impl an_ext =
+ object (self)
+ constraint 'ext = 'ext node #extension
+
+ val mutable parent = (None : 'ext node option)
+ val mutable node_position = -1
+ val mutable dtd = (None : dtd option)
+ val mutable extension = an_ext
+
+ initializer
+ extension # set_node (self : 'ext #node :> 'ext node)
+
+
+ method extension = (extension : 'ext)
+
+ method delete =
+ match parent with
+ None -> ()
+ | Some p -> p # internal_delete (self : 'ext #node :> 'ext node)
+
+ method parent =
+ match parent with
+ None -> raise Not_found
+ | Some p -> p
+
+ method root =
+ match parent with
+ None -> (self : 'ext #node :> 'ext node)
+ | Some p -> p # root
+
+ method node_position =
+ if node_position >= 0 then node_position else
+ raise Not_found
+
+ method node_path =
+ let rec collect n path =
+ try
+ let p = n # node_position in
+ collect (n # parent) (p :: path)
+ with
+ Not_found ->
+ (* n is the root *)
+ path
+ in
+ collect (self : 'ext #node :> 'ext node) []
+
+ method previous_node =
+ self # parent # nth_node (self # node_position - 1)
+
+ method next_node =
+ self # parent # nth_node (self # node_position + 1)
+
+ method orphaned_clone =
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ >} in
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+ method orphaned_flat_clone =
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ >} in
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+ method dtd =
+ match dtd with
+ None -> failwith "Pxp_document.node_impl#dtd: No DTD available"
+ | Some d -> d
+
+ method encoding =
+ match dtd with
+ None -> failwith "Pxp_document.node_impl#encoding: No DTD available"
+ | Some d -> d # encoding
+
+ method internal_adopt (new_parent : 'ext node option) pos =
+ begin match parent with
+ None -> ()
+ | Some p ->
+ if new_parent <> None then
+ failwith "Pxp_document.node_impl#internal_adopt: Tried to add a bound element"
+ end;
+ parent <- new_parent;
+ node_position <- pos
+
+ method internal_set_pos pos =
+ node_position <- pos
+
+ method virtual add_node : ?force:bool -> 'ext node -> unit
+ method virtual add_pinstr : proc_instruction -> unit
+ method virtual sub_nodes : 'ext node list
+ method virtual pinstr : string -> proc_instruction list
+ method virtual pinstr_names : string list
+ method virtual iter_nodes : ('ext node -> unit) -> unit
+ method virtual iter_nodes_sibl : ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ method virtual nth_node : int -> 'ext node
+ method virtual set_nodes : 'ext node list -> unit
+ method virtual data : string
+ method virtual node_type : node_type
+ method virtual position : (string * int * int)
+ method virtual attribute : string -> att_value
+ method virtual attribute_names : string list
+ method virtual attribute_type : string -> att_type
+ method virtual attributes : (string * Pxp_types.att_value) list
+ method virtual required_string_attribute : string -> string
+ method virtual required_list_attribute : string -> string list
+ method virtual optional_string_attribute : string -> string option
+ method virtual optional_list_attribute : string -> string list
+ method virtual quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ method virtual attributes_as_nodes : 'ext node list
+ method virtual set_comment : string option -> unit
+ method virtual comment : string option
+ method virtual create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list -> 'ext node
+ method virtual create_data : dtd -> string -> 'ext node
+ method virtual keep_always_whitespace_mode : unit
+ method virtual write : output_stream -> encoding -> unit
+ method virtual write_compact_as_latin1 : output_stream -> unit
+ method virtual local_validate : ?use_dfa:bool -> unit -> unit
+ method virtual internal_delete : 'ext node -> unit
+ method virtual internal_init : (string * int * int) ->
+ dtd -> string -> (string * string) list -> unit
+ method virtual internal_init_other : (string * int * int) ->
+ dtd -> node_type -> unit
+ end
+;;
+
+
+(**********************************************************************)
+
+let no_position = ("?", 0, 0) ;;
+
+
+class ['ext] data_impl an_ext : ['ext] node =
+ object (self)
+ inherit ['ext] node_impl an_ext
+ val mutable content = ("" : string)
+
+ method position = no_position
+
+ method add_node ?(force=false) _ =
+ failwith "method 'add_node' not applicable to data node"
+ method add_pinstr _ =
+ failwith "method 'add_pinstr' not applicable to data node"
+ method pinstr _ = []
+ method pinstr_names = []
+ method sub_nodes = []
+ method iter_nodes _ = ()
+ method iter_nodes_sibl _ = ()
+ method nth_node _ = raise Not_found
+ method set_nodes _ =
+ failwith "method 'set_nodes' not applicable to data node"
+ method data = content
+ method node_type = T_data
+ method attribute _ = raise Not_found
+ method attribute_names = []
+ method attribute_type _ = raise Not_found
+ method attributes = []
+ method required_string_attribute _ =
+ failwith "Markup.document, method required_string_attribute: not found"
+ method required_list_attribute _ =
+ failwith "Markup.document, method required_list_attribute: not found"
+ method optional_string_attribute _ = None
+ method optional_list_attribute _ = []
+ method id_attribute_name = raise Not_found
+ method id_attribute_value = raise Not_found
+ method idref_attribute_names = []
+ method quick_set_attributes _ =
+ failwith "method 'quick_set_attributes' not applicable to data node"
+ method attributes_as_nodes = []
+ method comment = None
+ method set_comment c =
+ match c with
+ None -> ()
+ | Some _ -> failwith "method 'set_comment' not applicable to data node"
+ method create_element ?position _ _ _ =
+ failwith "method 'create_element' not applicable to data node"
+ method create_data new_dtd new_str =
+ let x = extension # clone in
+ let n =
+ ( {< parent = None;
+ extension = x;
+ dtd = Some new_dtd;
+ content = new_str;
+ >}
+ : 'ext #node :> 'ext node) in
+ x # set_node n;
+ n
+ method local_validate ?use_dfa () = ()
+ method keep_always_whitespace_mode = ()
+
+
+ method write os enc =
+ let encoding = self # encoding in
+ write_data_string ~from_enc:encoding ~to_enc:enc os content
+
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ method internal_delete _ =
+ assert false
+ method internal_init _ _ _ _ =
+ assert false
+ method internal_init_other _ _ _ =
+ assert false
+ end
+;;
+
+
+(**********************************************************************)
+
+class ['ext] attribute_impl ~element ~name value dtd =
+ (object (self)
+ val mutable parent = (None : 'ext node option)
+ val mutable dtd = dtd
+ val mutable element_name = element
+ val mutable att_name = name
+ val mutable att_value = value
+
+ method parent =
+ match parent with
+ None -> raise Not_found
+ | Some p -> p
+
+ method root =
+ match parent with
+ None -> (self : 'ext #node :> 'ext node)
+ | Some p -> p # root
+
+ method internal_adopt new_parent _ =
+ parent <- new_parent
+
+ method orphaned_clone =
+ {< parent = None >}
+
+ method orphaned_flat_clone =
+ {< parent = None >}
+
+ method dtd = dtd
+
+ method encoding = dtd # encoding
+
+ method node_type = T_attribute att_name
+
+ method attribute n =
+ if n = att_name then att_value else raise Not_found
+
+ method attribute_names = [ att_name ]
+
+ method attribute_type n =
+ let eltype = dtd # element element_name in
+ ( try
+ let atype, adefault = eltype # attribute n in
+ atype
+ with
+ Undeclared ->
+ A_cdata
+ )
+
+ method attributes = [ att_name, att_value ]
+
+ method required_string_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> s
+ | Valuelist l -> String.concat " " l
+ | Implied_value -> raise Not_found
+ else
+ failwith "Pxp_document.attribute_impl#required_string_attribute: not found"
+
+
+ method required_list_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> raise Not_found
+ else
+ failwith "Pxp_document.attribute_impl#required_list_attribute: not found"
+
+ method optional_string_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> Some s
+ | Valuelist l -> Some(String.concat " " l)
+ | Implied_value -> None
+ else
+ None
+
+ method optional_list_attribute n =
+ if n = att_name then
+ match att_value with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> []
+ else
+ []
+
+ (* Senseless methods: *)
+
+ method sub_nodes = []
+ method pinstr _ = []
+ method pinstr_names = []
+ method iter_nodes _ = ()
+ method iter_nodes_sibl _ = ()
+ method nth_node _ = raise Not_found
+ method data = ""
+ method position = ("?",0,0)
+ method comment = None
+ method local_validate ?use_dfa () = ()
+
+ (* Non-applicable methods: *)
+
+ method extension =
+ failwith "Pxp_document.attribute_impl#extension: not applicable"
+ method delete =
+ failwith "Pxp_document.attribute_impl#delete: not applicable"
+ method node_position =
+ failwith "Pxp_document.attribute_impl#node_position: not applicable"
+ method node_path =
+ failwith "Pxp_document.attribute_impl#node_path: not applicable"
+ method previous_node =
+ failwith "Pxp_document.attribute_impl#previous_node: not applicable"
+ method next_node =
+ failwith "Pxp_document.attribute_impl#next_node: not applicable"
+ method internal_set_pos _ =
+ failwith "Pxp_document.attribute_impl#internal_set_pos: not applicable"
+ method internal_delete _ =
+ failwith "Pxp_document.attribute_impl#internal_delete: not applicable"
+ method internal_init _ _ _ _ =
+ failwith "Pxp_document.attribute_impl#internal_init: not applicable"
+ method internal_init_other _ _ _ =
+ failwith "Pxp_document.attribute_impl#internal_init_other: not applicable"
+ method add_node ?force _ =
+ failwith "Pxp_document.attribute_impl#add_node: not applicable"
+ method add_pinstr _ =
+ failwith "Pxp_document.attribute_impl#add_pinstr: not applicable"
+ method set_nodes _ =
+ failwith "Pxp_document.attribute_impl#set_nodes: not applicable"
+ method quick_set_attributes _ =
+ failwith "Pxp_document.attribute_impl#quick_set_attributes: not applicable"
+ method attributes_as_nodes =
+ failwith "Pxp_document.attribute_impl#dattributes_as_nodes: not applicable"
+ method set_comment c =
+ if c <> None then
+ failwith "Pxp_document.attribute_impl#set_comment: not applicable"
+ method create_element ?position _ _ _ =
+ failwith "Pxp_document.attribute_impl#create_element: not applicable"
+ method create_data _ _ =
+ failwith "Pxp_document.attribute_impl#create_data: not applicable"
+ method keep_always_whitespace_mode =
+ failwith "Pxp_document.attribute_impl#keep_always_whitespace_mode: not applicable"
+ method write _ _ =
+ failwith "Pxp_document.attribute_impl#write: not applicable"
+ method write_compact_as_latin1 _ =
+ failwith "Pxp_document.attribute_impl#write_compact_as_latin1: not applicable"
+ method id_attribute_name =
+ failwith "Pxp_document.attribute_impl#id_attribute_name: not applicable"
+ method id_attribute_value =
+ failwith "Pxp_document.attribute_impl#id_attribute_value: not applicable"
+ method idref_attribute_names =
+ failwith "Pxp_document.attribute_impl#idref_attribute_names: not applicable"
+ end
+ : ['ext] node)
+;;
+
+(**********************************************************************)
+
+class ['ext] element_impl an_ext : ['ext] node =
+ object (self:'self)
+ inherit ['ext] node_impl an_ext as super
+
+ val mutable content_model = Any
+ val mutable content_dfa = lazy None
+ val mutable ext_decl = false
+ val mutable ntype = T_none
+ val mutable id_att_name = None
+ val mutable idref_att_names = []
+ val mutable rev_nodes = ([] : 'c list)
+ val mutable nodes = (None : 'c list option)
+ val mutable array = (None : 'c array option)
+ val mutable size = 0
+ val mutable attributes = []
+ val mutable att_nodes = []
+ val mutable comment = None
+ val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
+ val mutable keep_always_whitespace = false
+
+ val mutable position = no_position
+
+ method comment = comment
+
+ method set_comment c =
+ if ntype = T_comment then
+ comment <- c
+ else
+ failwith "set_comment: not applicable to node types other than T_comment"
+
+ method attributes = attributes
+
+ method position = position
+
+ method private error_name =
+ match ntype with
+ T_element n -> "Element `" ^ n ^ "'"
+ | T_super_root -> "Super root"
+ | T_pinstr n -> "Wrapper element for processing instruction `" ^ n ^
+ "'"
+ | T_comment -> "Wrapper element for comment"
+ | T_none -> "NO element"
+ | T_attribute _ -> assert false
+ | T_namespace _ -> assert false
+ | T_data -> assert false
+
+ method add_node ?(force = false) n =
+ let only_whitespace s =
+ (* Checks that the string "s" contains only whitespace. On failure,
+ * Validation_error is raised.
+ *)
+ let l = String.length s in
+ if l < 100 then begin
+ for i=0 to l - 1 do (* for loop is faster for small 'l' *)
+ match s.[i] with
+ ('\009'|'\010'|'\013'|'\032') -> ()
+ | _ ->
+ raise(Validation_error(self # error_name ^
+ " must not have character contents"));
+ done
+ end
+ else begin
+ let lexbuf = Lexing.from_string s in
+ let lexerset = Pxp_lexers.get_lexer_set (self # dtd # encoding) in
+ let t = lexerset.scan_name_string lexbuf in
+ if t <> Ignore or
+ (lexerset.scan_name_string lexbuf <> Eof)
+ then
+ raise(Validation_error(self # error_name ^
+ " must not have character contents"));
+ ()
+ end
+ in
+ (* general DTD check: *)
+ begin match dtd with
+ None -> ()
+ | Some d -> if n # dtd != d then
+ failwith "Pxp_document.element_impl # add_node: the sub node has a different DTD";
+ end;
+ (* specific checks: *)
+ try
+ begin match n # node_type with
+ T_data ->
+ begin match content_model with
+ Any -> ()
+ | Unspecified -> ()
+ | Empty ->
+ if not force then begin
+ if n # data <> "" then
+ raise(Validation_error(self # error_name ^
+ " must be empty"));
+ raise Skip
+ end
+ | Mixed _ -> ()
+ | Regexp _ ->
+ if not force then begin
+ only_whitespace (n # data);
+ (* TODO: following check faster *)
+ if n # dtd # standalone_declaration &&
+ n # data <> ""
+ then begin
+ (* The standalone declaration is violated if the
+ * element declaration is contained in an external
+ * entity.
+ *)
+ if ext_decl then
+ raise
+ (Validation_error
+ (self # error_name ^
+ " violates standalone declaration" ^
+ " because extra white space separates" ^
+ " the sub elements"));
+ end;
+ if not keep_always_whitespace then raise Skip
+ end
+ end
+ | _ ->
+ ()
+ end;
+ (* all OK, so add this node: *)
+ n # internal_adopt (Some (self : 'ext #node :> 'ext node)) size;
+ rev_nodes <- n :: rev_nodes;
+ nodes <- None;
+ array <- None;
+ size <- size + 1
+ with Skip ->
+ ()
+
+ method add_pinstr pi =
+ begin match dtd with
+ None -> ()
+ | Some d ->
+ if pi # encoding <> d # encoding then
+ failwith "Pxp_document.element_impl # add_pinstr: Inconsistent encodings";
+ end;
+ let name = pi # target in
+ Hashtbl.add (Lazy.force pinstr) name pi
+
+ method pinstr name =
+ Hashtbl.find_all (Lazy.force pinstr) name
+
+ method pinstr_names =
+ let l = ref [] in
+ Hashtbl.iter
+ (fun n _ -> l := n :: !l)
+ (Lazy.force pinstr);
+ !l
+
+ method sub_nodes =
+ match nodes with
+ None ->
+ let cl = List.rev rev_nodes in
+ nodes <- Some cl;
+ cl
+ | Some cl ->
+ cl
+
+ method iter_nodes f =
+ let cl = self # sub_nodes in
+ List.iter f cl
+
+ method iter_nodes_sibl f =
+ let cl = self # sub_nodes in
+ let rec next last_node l =
+ match l with
+ [] -> ()
+ | [x] ->
+ f last_node x None
+ | x :: y :: l' ->
+ f last_node x (Some y);
+ next (Some x) l'
+ in
+ next None cl
+
+ method nth_node p =
+ if p < 0 or p >= size then raise Not_found;
+ if array = None then
+ array <- Some (Array.of_list (self # sub_nodes));
+ match array with
+ None -> assert false
+ | Some a ->
+ a.(p)
+
+ method set_nodes nl =
+ let old_size = size in
+ List.iter
+ (fun n -> n # internal_adopt None (-1))
+ rev_nodes;
+ begin try
+ size <- 0;
+ List.iter
+ (fun n -> n # internal_adopt
+ (Some (self : 'ext #node :> 'ext node))
+ size;
+ size <- size + 1)
+ nl
+ with
+ e ->
+ (* revert action as much as possible *)
+ List.iter
+ (fun n -> n # internal_adopt None (-1))
+ rev_nodes;
+ size <- old_size;
+ let pos = ref (size-1) in
+ List.iter
+ (fun n -> n # internal_adopt
+ (Some (self : 'ext #node :> 'ext node))
+ !pos;
+ decr pos
+ )
+ rev_nodes;
+ (* [TODO] Note: there may be bad members in nl *)
+ raise e
+ end;
+ rev_nodes <- List.rev nl;
+ array <- None;
+ nodes <- None
+
+
+ method orphaned_clone : 'self =
+ let sub_clones =
+ List.map
+ (fun m ->
+ m # orphaned_clone)
+ rev_nodes
+ in
+
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ rev_nodes = sub_clones;
+ nodes = None;
+ array = None;
+ >} in
+
+ let pos = ref (size - 1) in
+ List.iter
+ (fun m -> m # internal_adopt
+ (Some (n : 'ext #node :> 'ext node))
+ !pos;
+ decr pos
+ )
+ sub_clones;
+
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+ method orphaned_flat_clone : 'self =
+ let x = extension # clone in
+ let n =
+ {< parent = None;
+ node_position = -1;
+ extension = x;
+ rev_nodes = [];
+ nodes = None;
+ size = 0;
+ array = None;
+ >} in
+
+ x # set_node (n : 'ext #node :> 'ext node);
+ n
+
+
+ method internal_delete n =
+ rev_nodes <- List.filter (fun n' -> n' != n) rev_nodes;
+ size <- size - 1;
+ let p = ref (size-1) in
+ List.iter
+ (fun n' -> n' # internal_set_pos !p; decr p)
+ rev_nodes;
+ nodes <- None;
+ n # internal_adopt None (-1);
+
+
+ method data =
+ let cl = self # sub_nodes in
+ String.concat "" (List.map (fun n -> n # data) cl)
+
+ method node_type = ntype
+
+
+ method attribute n =
+ List.assoc n attributes
+
+ method attribute_names =
+ List.map fst attributes
+
+ method attribute_type n =
+ match ntype with
+ T_element name ->
+ let d =
+ match dtd with
+ None -> assert false
+ | Some d -> d in
+ let eltype = d # element name in
+ ( try
+ let atype, adefault = eltype # attribute n in
+ atype
+ with
+ Undeclared ->
+ A_cdata
+ )
+ | _ ->
+ failwith "attribute_type: not available for non-element nodes"
+
+
+ method required_string_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> s
+ | Valuelist l -> String.concat " " l
+ | Implied_value -> raise Not_found
+ with
+ Not_found ->
+ failwith "Pxp_document, method required_string_attribute: not found"
+
+ method optional_string_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> Some s
+ | Valuelist l -> Some (String.concat " " l)
+ | Implied_value -> None
+ with
+ Not_found ->
+ None
+
+ method required_list_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> raise Not_found
+ with
+ Not_found ->
+ failwith "Markup.document, method required_list_attribute: not found"
+
+ method optional_list_attribute n =
+ try
+ match List.assoc n attributes with
+ Value s -> [ s ]
+ | Valuelist l -> l
+ | Implied_value -> []
+ with
+ Not_found ->
+ []
+
+ method id_attribute_name =
+ match id_att_name with
+ None -> raise Not_found
+ | Some name -> name
+
+ method id_attribute_value =
+ match id_att_name with
+ None -> raise Not_found
+ | Some name ->
+ begin match List.assoc name attributes (* may raise Not_found *)
+ with
+ Value s -> s
+ | _ -> raise Not_found
+ end
+
+
+ method idref_attribute_names = idref_att_names
+
+
+ method quick_set_attributes atts =
+ match ntype with
+ T_element _ ->
+ attributes <- atts;
+ att_nodes <- []
+ | _ ->
+ failwith "quick_set_attributes: not applicable for non-element node"
+
+
+ method attributes_as_nodes =
+ match att_nodes with
+ [] when attributes = [] ->
+ []
+ | [] ->
+ let dtd = self # dtd in
+ let element_name =
+ match ntype with
+ T_element n -> n
+ | _ ->
+ assert false in
+ let l =
+ List.map
+ (fun (n,v) ->
+ new attribute_impl
+ ~element:element_name
+ ~name:n
+ v
+ dtd)
+ attributes in
+ att_nodes <- l;
+ l
+ | _ ->
+ att_nodes
+
+
+ method create_element
+ ?(position = no_position) new_dtd new_type new_attlist =
+ let x = extension # clone in
+ let obj = ( {< parent = None;
+ extension = x;
+ pinstr = lazy (Hashtbl.create 10)
+ >}
+ : 'ext #node :> 'ext node
+ ) in
+ x # set_node obj;
+ match new_type with
+ T_data ->
+ failwith "create_element: Cannot create T_data node"
+ | T_element name ->
+ obj # internal_init position new_dtd name new_attlist;
+ obj
+ | (T_comment | T_pinstr _ | T_super_root | T_none) ->
+ obj # internal_init_other position new_dtd new_type;
+ obj
+ | _ ->
+ failwith "create_element: Cannot create such node"
+
+
+ method internal_init_other new_pos new_dtd new_ntype =
+ (* resets the contents of the object *)
+ parent <- None;
+ rev_nodes <- [];
+ nodes <- None;
+ ntype <- new_ntype;
+ position <- new_pos;
+ content_model <- Any;
+ content_dfa <- lazy None;
+ attributes <- [];
+ att_nodes <- [];
+ dtd <- Some new_dtd;
+ ext_decl <- false;
+ id_att_name <- None;
+ idref_att_names <- [];
+ comment <- None;
+
+
+ method internal_init new_pos new_dtd new_name new_attlist =
+ (* ONLY FOR T_Element NODES!!! *)
+ (* resets the contents of the object *)
+ parent <- None;
+ rev_nodes <- [];
+ nodes <- None;
+ ntype <- T_element new_name;
+ position <- new_pos;
+ comment <- None;
+ att_nodes <- [];
+
+ let lexerset = Pxp_lexers.get_lexer_set (new_dtd # encoding) in
+ let sadecl = new_dtd # standalone_declaration in
+
+ (* First validate the element name and the attributes: *)
+ (* Well-Formedness Constraint: Unique Att Spec *)
+ let rec check_uniqueness al =
+ match al with
+ [] -> ()
+ | (n, av) :: al' ->
+ if List.mem_assoc n al' then
+ raise (WF_error("Attribute `" ^ n ^ "' occurs twice in element `" ^ new_name ^ "'"));
+ check_uniqueness al'
+ in
+ check_uniqueness new_attlist;
+ (* Validity Constraint: Element Valid [element has been declared] *)
+ try
+ let eltype = new_dtd # element new_name in
+ content_model <- eltype # content_model;
+ content_dfa <- lazy(eltype # content_dfa);
+ ext_decl <- eltype # externally_declared;
+ id_att_name <- eltype # id_attribute_name;
+ idref_att_names <- eltype # idref_attribute_names;
+ (* Validity Constraint: Attribute Value Type *)
+ (* Validity Constraint: Fixed Attribute Default *)
+ (* Validity Constraint: Standalone Document Declaration (partly) *)
+ let undeclared_attlist = ref [] in
+ let new_attlist' =
+ List.map
+ (fun (n,v) ->
+ try
+ (* Get type, default, and the normalized attribute
+ * value 'av':
+ *)
+ let atype, adefault = eltype # attribute n in
+ let av = value_of_attribute lexerset new_dtd n atype v in
+ (* If necessary, check whether normalization violates
+ * the standalone declaration.
+ *)
+ if sadecl &&
+ eltype #
+ attribute_violates_standalone_declaration n (Some v)
+ then
+ raise
+ (Validation_error
+ ("Attribute `" ^ n ^ "' of element type `" ^
+ new_name ^ "' violates standalone declaration"));
+ (* If the default is "fixed", check that. *)
+ begin match adefault with
+ (D_required | D_implied) -> ()
+ | D_default _ -> ()
+ | D_fixed u ->
+ let uv = value_of_attribute
+ lexerset new_dtd "[default]" atype u in
+ if av <> uv then
+ raise
+ (Validation_error
+ ("Attribute `" ^ n ^
+ "' is fixed, but has here a different value"));
+ end;
+ n,av
+ with
+ Undeclared ->
+ (* raised by method "# attribute" *)
+ undeclared_attlist :=
+ (n, value_of_attribute lexerset new_dtd n A_cdata v) ::
+ !undeclared_attlist;
+ n, Implied_value (* does not matter *)
+ )
+ new_attlist in
+ (* Validity Constraint: Required Attribute *)
+ (* Validity Constraint: Standalone Document Declaration (partly) *)
+ (* Add attributes with default values *)
+ let new_attlist'' =
+ List.map
+ (fun n ->
+ try
+ n, List.assoc n new_attlist'
+ with
+ Not_found ->
+ (* Check standalone declaration: *)
+ if sadecl &&
+ eltype #
+ attribute_violates_standalone_declaration
+ n None then
+ raise
+ (Validation_error
+ ("Attribute `" ^ n ^ "' of element type `" ^
+ new_name ^ "' violates standalone declaration"));
+ (* add default value or Implied *)
+ let atype, adefault = eltype # attribute n in
+ match adefault with
+ D_required ->
+ raise(Validation_error("Required attribute `" ^ n ^ "' is missing"))
+ | D_implied ->
+ n, Implied_value
+ | D_default v ->
+ n, value_of_attribute lexerset new_dtd n atype v
+ | D_fixed v ->
+ n, value_of_attribute lexerset new_dtd n atype v
+ )
+ (eltype # attribute_names)
+ in
+ dtd <- Some new_dtd;
+ attributes <- new_attlist'' @ !undeclared_attlist;
+ with
+ Undeclared ->
+ (* The DTD allows arbitrary attributes/contents for this
+ * element
+ *)
+ dtd <- Some new_dtd;
+ attributes <- List.map (fun (n,v) -> n, Value v) new_attlist;
+ content_model <- Any;
+ content_dfa <- lazy None;
+
+ method local_validate ?(use_dfa=false) () =
+ (* validates that the content of this element matches the model *)
+ let dfa = if use_dfa then Lazy.force content_dfa else None in
+ if not (validate_content
+ ~use_dfa:dfa
+ content_model
+ (self : 'ext #node :> 'ext node)) then
+ raise(Validation_error(self # error_name ^
+ " does not match its content model"))
+
+
+ method create_data _ _ =
+ failwith "method 'create_data' not applicable to element node"
+
+ method keep_always_whitespace_mode =
+ keep_always_whitespace <- true
+
+ method write os enc =
+ let encoding = self # encoding in
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ begin match ntype with
+ T_element name ->
+ wms ("<" ^ name);
+ List.iter
+ (fun (aname, avalue) ->
+ match avalue with
+ Implied_value -> ()
+ | Value v ->
+ wms ("\n" ^ aname ^ "=\"");
+ write_data_string ~from_enc:encoding ~to_enc:enc os v;
+ wms "\"";
+ | Valuelist l ->
+ let v = String.concat " " l in
+ wms ("\n" ^ aname ^ "=\"");
+ write_data_string ~from_enc:encoding ~to_enc:enc os v;
+ wms "\"";
+ )
+ attributes;
+ wms "\n>";
+ | _ ->
+ ()
+ end;
+
+ Hashtbl.iter
+ (fun n pi ->
+ pi # write os enc
+ )
+ (Lazy.force pinstr);
+ List.iter
+ (fun n -> n # write os enc)
+ (self # sub_nodes);
+
+ begin match ntype with
+ T_element name ->
+ wms ("</" ^ name ^ "\n>");
+ | _ ->
+ ()
+ end
+
+ (* TODO: How to write comments? The comment string may contain
+ * illegal characters or "--".
+ *)
+
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ end
+;;
+
+
+let spec_table_find_exemplar tab eltype =
+ try
+ Hashtbl.find tab.mapping eltype
+ with
+ Not_found -> tab.default_element
+;;
+
+
+let create_data_node spec dtd str =
+ match spec with
+ Spec_table tab ->
+ let exemplar = tab.data_node in
+ exemplar # create_data dtd str
+;;
+
+
+let create_element_node ?position spec dtd eltype atts =
+ match spec with
+ Spec_table tab ->
+ let exemplar = spec_table_find_exemplar tab eltype in
+ exemplar # create_element ?position:position dtd (T_element eltype) atts
+;;
+
+
+let create_super_root_node ?position spec dtd =
+ match spec with
+ Spec_table tab ->
+ ( match tab.super_root_node with
+ None ->
+ failwith "Pxp_document.create_super_root_node: No exemplar"
+ | Some x ->
+ x # create_element ?position:position dtd T_super_root []
+ )
+;;
+
+let create_no_node ?position spec dtd =
+ match spec with
+ Spec_table tab ->
+ let x = tab.default_element in
+ x # create_element ?position:position dtd T_none []
+;;
+
+
+let create_comment_node ?position spec dtd text =
+ match spec with
+ Spec_table tab ->
+ ( match tab.comment_node with
+ None ->
+ failwith "Pxp_document.create_comment_node: No exemplar"
+ | Some x ->
+ let e = x # create_element ?position:position dtd T_comment []
+ in
+ e # set_comment (Some text);
+ e
+ )
+;;
+
+
+let create_pinstr_node ?position spec dtd pi =
+ let target = pi # target in
+ let exemplar =
+ match spec with
+ Spec_table tab ->
+ ( try
+ Hashtbl.find tab.pinstr_mapping target
+ with
+ Not_found ->
+ ( match tab.default_pinstr_node with
+ None ->
+ failwith
+ "Pxp_document.create_pinstr_node: No exemplar"
+ | Some x -> x
+ )
+ )
+ in
+ let el =
+ exemplar # create_element ?position:position dtd (T_pinstr target) [] in
+ el # add_pinstr pi;
+ el
+;;
+
+
+let find ?(deeply=false) f base =
+ let rec search_flat children =
+ match children with
+ [] -> raise Not_found
+ | n :: children' ->
+ if f n then n else search_flat children'
+ in
+ let rec search_deep children =
+ match children with
+ [] -> raise Not_found
+ | n :: children' ->
+ if f n then
+ n
+ else
+ try search_deep (n # sub_nodes)
+ with Not_found -> search_deep children'
+ in
+ (if deeply then search_deep else search_flat)
+ (base # sub_nodes)
+;;
+
+
+let find_all ?(deeply=false) f base =
+ let rec search_flat children =
+ match children with
+ [] -> []
+ | n :: children' ->
+ if f n then n :: search_flat children' else search_flat children'
+ in
+ let rec search_deep children =
+ match children with
+ [] -> []
+ | n :: children' ->
+ let rest =
+ search_deep (n # sub_nodes) @ search_deep children' in
+ if f n then
+ n :: rest
+ else
+ rest
+ in
+ (if deeply then search_deep else search_flat)
+ (base # sub_nodes)
+;;
+
+
+let find_element ?deeply eltype base =
+ find
+ ?deeply:deeply
+ (fun n ->
+ match n # node_type with
+ T_element name -> name = eltype
+ | _ -> false)
+ base
+;;
+
+
+let find_all_elements ?deeply eltype base =
+ find_all
+ ?deeply:deeply
+ (fun n ->
+ match n # node_type with
+ T_element name -> name = eltype
+ | _ -> false)
+ base
+;;
+
+
+exception Skip;;
+
+let map_tree ~pre ?(post=(fun x -> x)) base =
+ let rec map_rec n =
+ (try
+ let n' = pre n in
+ if n' # node_type <> T_data then begin
+ let children = n # sub_nodes in
+ let children' = map_children children in
+ n' # set_nodes children';
+ end;
+ post n'
+ with
+ Skip -> raise Not_found
+ )
+ and map_children l =
+ match l with
+ [] -> []
+ | child :: l' ->
+ (try
+ let child' = map_rec child in
+ child' :: map_children l'
+ with
+ Not_found ->
+ map_children l'
+ )
+ in
+ map_rec base
+;;
+
+
+let map_tree_sibl ~pre ?(post=(fun _ x _ -> x)) base =
+ let rec map_rec l n r =
+ (try
+ let n' = pre l n r in
+ if n' # node_type <> T_data then begin
+ let children = n # sub_nodes in
+ let children' = map_children None children in
+ let children'' = postprocess_children None children' in
+ n' # set_nodes children'';
+ end;
+ n'
+ with
+ Skip -> raise Not_found
+ )
+ and map_children predecessor l =
+ (match l with
+ [] -> []
+ | child :: l' ->
+ let successor =
+ match l' with
+ [] -> None
+ | x :: _ -> Some x in
+ (try
+ let child' = map_rec predecessor child successor in
+ child' :: map_children (Some child) l'
+ with
+ Not_found ->
+ map_children (Some child) l'
+ )
+ )
+ and postprocess_children predecessor l =
+ (match l with
+ [] -> []
+ | child :: l' ->
+ let successor =
+ match l' with
+ [] -> None
+ | x :: _ -> Some x in
+ (try
+ let child' = post predecessor child successor in
+ child' :: postprocess_children (Some child) l'
+ with
+ Skip ->
+ postprocess_children (Some child) l'
+ )
+ )
+ in
+ let base' = map_rec None base None in
+ try post None base' None with Skip -> raise Not_found
+;;
+
+
+let iter_tree ?(pre=(fun x -> ())) ?(post=(fun x -> ())) base =
+ let rec iter_rec n =
+ (try
+ pre n;
+ let children = n # sub_nodes in
+ iter_children children;
+ post n
+ with
+ Skip -> raise Not_found
+ )
+ and iter_children l =
+ match l with
+ [] -> []
+ | child :: l' ->
+ (try
+ iter_rec child;
+ iter_children l'
+ with
+ Not_found ->
+ iter_children l'
+ )
+ in
+ iter_rec base
+;;
+
+
+let iter_tree_sibl ?(pre=(fun _ _ _ -> ())) ?(post=(fun _ _ _ -> ())) base =
+ let rec iter_rec l n r =
+ (try
+ pre l n r;
+ let children = n # sub_nodes in
+ iter_children None children;
+ post l n r
+ with
+ Skip -> raise Not_found
+ )
+ and iter_children predecessor l =
+ (match l with
+ [] -> []
+ | child :: l' ->
+ let successor =
+ match l' with
+ [] -> None
+ | x :: _ -> Some x in
+ (try
+ iter_rec predecessor child successor;
+ iter_children (Some child) l'
+ with
+ Not_found ->
+ iter_children (Some child) l'
+ )
+ )
+ in
+ iter_rec None base None
+;;
+
+
+let compare a b =
+ let rec cmp p1 p2 =
+ match p1, p2 with
+ [], [] -> 0
+ | [], _ -> -1
+ | _, [] -> 1
+ | x::p1', y::p2' -> if x = y then cmp p1' p2' else x - y
+ in
+
+ let a_path = a # node_path in
+ let b_path = b # node_path in
+
+ cmp a_path b_path
+;;
+
+
+type 'ext ord_index = ('ext node, int) Hashtbl.t;;
+
+let create_ord_index base =
+ let n = ref 0 in
+ iter_tree ~pre:(fun _ -> incr n) base;
+ let idx = Hashtbl.create !n in
+ let k = ref 0 in
+ iter_tree ~pre:(fun node -> Hashtbl.add idx node !k; incr k) base;
+ idx
+;;
+
+
+let ord_number idx node =
+ Hashtbl.find idx node
+;;
+
+let ord_compare idx a b =
+ let ord_a = Hashtbl.find idx a in
+ let ord_b = Hashtbl.find idx b in
+ ord_a - ord_b
+;;
+
+class ['ext] document the_warner =
+ object (self)
+ val mutable xml_version = "1.0"
+ val mutable dtd = (None : dtd option)
+ val mutable root = (None : 'ext node option)
+
+ val pinstr = lazy (Hashtbl.create 10 : (string,proc_instruction) Hashtbl.t)
+ val warner = (the_warner : collect_warnings)
+
+ method init_xml_version s =
+ if s <> "1.0" then
+ warner # warn ("XML version '" ^ s ^ "' not supported");
+ xml_version <- s
+
+ method init_root r =
+ let dtd_r = r # dtd in
+ match r # node_type with
+
+ (**************** CASE: We have a super root element ***************)
+
+ | T_super_root ->
+ if not (dtd_r # arbitrary_allowed) then begin
+ match dtd_r # root with
+ Some declared_root_element_name ->
+ let real_root_element =
+ try
+ List.find
+ (fun r' ->
+ match r' # node_type with
+ | T_element _ -> true
+ | _ -> false)
+ (r # sub_nodes)
+ with
+ Not_found ->
+ failwith "Pxp_document.document#init_root: Super root does not contain root element"
+ (* TODO: Check also that there is at most one
+ * element in the super root node
+ *)
+
+ in
+ let real_root_element_name =
+ match real_root_element # node_type with
+ T_element name -> name
+ | _ -> assert false
+ in
+ if real_root_element_name <> declared_root_element_name then
+ raise
+ (Validation_error ("The root element is `" ^
+ real_root_element_name ^
+ "' but is declared as `" ^
+ declared_root_element_name))
+ | None -> ()
+ end;
+ (* All is okay, so store dtd and root node: *)
+ dtd <- Some dtd_r;
+ root <- Some r
+
+ (**************** CASE: No super root element **********************)
+
+ | T_element root_element_name ->
+ if not (dtd_r # arbitrary_allowed) then begin
+ match dtd_r # root with
+ Some declared_root_element_name ->
+ if root_element_name <> declared_root_element_name then
+ raise
+ (Validation_error ("The root element is `" ^
+ root_element_name ^
+ "' but is declared as `" ^
+ declared_root_element_name))
+ | None ->
+ (* This may happen if you initialize your DTD yourself.
+ * The value 'None' means that the method 'set_root' was
+ * never called for the DTD; we interpret it here as:
+ * The root element does not matter.
+ *)
+ ()
+ end;
+ (* All is okay, so store dtd and root node: *)
+ dtd <- Some dtd_r;
+ root <- Some r
+
+ | _ ->
+ failwith "Pxp_document.document#init_root: the root node must be an element or super-root"
+
+ method xml_version = xml_version
+
+ method xml_standalone =
+ match dtd with
+ None -> false
+ | Some d -> d # standalone_declaration
+
+ method dtd =
+ match dtd with
+ None -> failwith "Pxp_document.document#dtd: Document has no DTD"
+ | Some d -> d
+
+ method encoding =
+ match dtd with
+ None -> failwith "Pxp_document.document#encoding: Document has no DTD"
+ | Some d -> d # encoding
+
+ method root =
+ match root with
+ None -> failwith "Pxp_document.document#root: Document has no root element"
+ | Some r -> r
+
+ method add_pinstr pi =
+ begin match dtd with
+ None -> ()
+ | Some d ->
+ if pi # encoding <> d # encoding then
+ failwith "Pxp_document.document # add_pinstr: Inconsistent encodings";
+ end;
+ let name = pi # target in
+ Hashtbl.add (Lazy.force pinstr) name pi
+
+ method pinstr name =
+ Hashtbl.find_all (Lazy.force pinstr) name
+
+ method pinstr_names =
+ let l = ref [] in
+ Hashtbl.iter
+ (fun n _ -> l := n :: !l)
+ (Lazy.force pinstr);
+ !l
+
+ method write os enc =
+ let encoding = self # encoding in
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let r = self # root in
+ wms ("<?xml version='1.0' encoding='" ^
+ Netconversion.string_of_encoding enc ^
+ "'?>\n");
+ ( match self # dtd # root with
+ None ->
+ self # dtd # write os enc false
+ | Some _ ->
+ self # dtd # write os enc true
+ );
+ Hashtbl.iter
+ (fun n pi ->
+ pi # write os enc
+ )
+ (Lazy.force pinstr);
+ r # write os enc;
+ wms "\n";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.14 2000/08/30 15:47:52 gerd
+ * Implementation of pxp_document.mli rev 1.10.
+ *
+ * Revision 1.13 2000/08/26 23:29:10 gerd
+ * Implementations for the changed in rev 1.9 of pxp_document.mli.
+ *
+ * Revision 1.12 2000/08/18 20:14:00 gerd
+ * New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
+ * (T_none), (T_namespace).
+ *
+ * Revision 1.11 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.10 2000/07/23 02:16:34 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.9 2000/07/16 19:37:09 gerd
+ * Simplification.
+ *
+ * Revision 1.8 2000/07/16 17:50:01 gerd
+ * Fixes in 'write'
+ *
+ * Revision 1.7 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.6 2000/07/14 13:56:11 gerd
+ * Added methods id_attribute_name, id_attribute_value,
+ * idref_attribute_names.
+ *
+ * Revision 1.5 2000/07/09 17:51:14 gerd
+ * Element nodes can store positions.
+ *
+ * Revision 1.4 2000/07/08 23:04:06 gerd
+ * [Merging 0.2.10:] Bugfix: allow_undeclared_attribute
+ *
+ * Revision 1.3 2000/07/04 22:10:06 gerd
+ * Implemented rev 1.3 of pxp_document.mli in a straight-
+ * forward fashion.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_document.ml:
+ *
+ * Revision 1.19 2000/05/27 19:14:42 gerd
+ * value_of_attribute: this function has been moved to
+ * markup_aux.ml.
+ *
+ * Added the following checks whether there is a violation
+ * against the standalone declaration:
+ * - Externally declared elements with regexp content model
+ * must not contain extra white space
+ * - The effect of normalization of externally declared attributes
+ * must not depend on the type of the attributes
+ * - Declared default values of externally declared attributes
+ * must not have an effect on the value of the attributes.
+ *
+ * Removed the method init_xml_standalone. It is now stored in
+ * the DTD whether there is a standalone declaration.
+ *
+ * Revision 1.18 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.17 2000/05/06 23:12:20 gerd
+ * Allow undeclared attributes.
+ *
+ * Revision 1.16 2000/05/01 20:42:28 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.15 2000/04/30 18:15:22 gerd
+ * In function validate_content: Special handling of the pseudo
+ * nodes "-pi" and "-vr".
+ * Method init_root, class document: Recognizes whether the
+ * root is virtual or real. The check on the root element name is different
+ * in each case.
+ * New method keep_always_whitespace_mode: Turns a special mode
+ * on in which ignorable whitespace is included into the document.
+ *
+ * Revision 1.14 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.13 2000/01/27 21:51:56 gerd
+ * Added method 'attributes'.
+ *
+ * Revision 1.12 2000/01/27 21:19:34 gerd
+ * Added methods.
+ * Bugfix: 'orphaned_clone' performs now really a clone.
+ *
+ * Revision 1.11 2000/01/20 21:57:58 gerd
+ * Bugfix: method set_nodes does no longer add the new subnodes
+ * in the reverse order.
+ *
+ * Revision 1.10 1999/12/17 21:35:37 gerd
+ * Bugfix: If the name of the root element is not specified in
+ * the DTD, the document does not check whether the root element is a
+ * specific element.
+ *
+ * Revision 1.9 1999/11/09 22:22:01 gerd
+ * The "document" classes now checks that the root element is the
+ * same as the declared root element. Thanks to Claudio Sacerdoti Coen
+ * for his bug report.
+ *
+ * Revision 1.8 1999/09/01 22:51:40 gerd
+ * Added methods to store processing instructions.
+ *
+ * Revision 1.7 1999/09/01 16:19:18 gerd
+ * Added some warnings.
+ * If an element type has the content model EMPTY, it is now strictly
+ * checked that the element instance is really empty. Especially, white space
+ * is NOT allowed in such instances.
+ *
+ * Revision 1.6 1999/08/19 21:58:59 gerd
+ * Added method "reset_finder". This is not very convincing, but
+ * currently the simplest way to update the ID hash table.
+ *
+ * Revision 1.5 1999/08/19 01:08:15 gerd
+ * Added method "find" that searches node by ID in the whole
+ * tree.
+ * Bugfix: After the extension has been cloned, the "set_node" method
+ * is invoked telling the clone to which node it is associated.
+ *
+ * Revision 1.4 1999/08/15 13:52:52 gerd
+ * Bugfix: WF_error "Attribute x occurs twice in element [unnamed]"
+ * no longer possible; instead of "[unnamed]" the actual name is printed.
+ * Improved some of the error messages.
+ *
+ * Revision 1.3 1999/08/15 02:19:01 gerd
+ * If the DTD allows arbitrary elements, unknown elements are not
+ * rejected.
+ *
+ * Revision 1.2 1999/08/11 14:54:23 gerd
+ * Optimizations: The hashtable for the 'pinstr' variable is only
+ * created on demand. -- The 'only_whitespace' function uses a simple "for"
+ * loop is the string is small and a lexer if the string is big.
+ *
+ * Revision 1.1 1999/08/10 00:35:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+(**********************************************************************)
+(* *)
+(* Pxp_document: *)
+(* Object model of the document/element instances *)
+(* *)
+(**********************************************************************)
+
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class type node ............. The common class type of the nodes of
+ * the element tree. Nodes are either
+ * elements (inner nodes) or data nodes
+ * (leaves)
+ * class type extension ........ The minimal properties of the so-called
+ * extensions of the nodes: Nodes can be
+ * customized by applying a class parameter
+ * that adds methods/values to nodes.
+ * class data_impl : node ...... Implements data nodes.
+ * class element_impl : node ... Implements element nodes
+ * class document .............. A document is an element with some additional
+ * properties
+ *
+ * ======================================================================
+ *
+ * THE STRUCTURE OF NODE TREES:
+ *
+ * Every node except the root node has a parent node. The parent node is
+ * always an element, because data nodes never contain other nodes.
+ * In the other direction, element nodes may have children; both elements
+ * and data nodes are possible as children.
+ * Every node knows its parent (if any) and all its children (if any);
+ * the linkage is maintained in both directions. A node without a parent
+ * is called a root.
+ * It is not possible that a node is the child of two nodes (two different nodes
+ * or a multiple child of the same node).
+ * You can break the connection between a node and its parent; the method
+ * "delete" performs this operations and deletes the node from the parent's
+ * list of children. The node is now a root, for itself and for all
+ * subordinate nodes. In this context, the node is also called an orphan,
+ * because it has lost its parent (this is a bit misleading because the
+ * parent is not always the creator of a node).
+ * In order to simplify complex operations, you can also set the list of
+ * children of an element. Nodes that have been children before are unchanged;
+ * new nodes are added (and the linkage is set up), nodes no more occurring
+ * in the list are handled if they have been deleted.
+ * If you try to add a node that is not a root (either by an "add" or by a
+ * "set" operation) the operation fails.
+ *
+ * CREATION OF NODES
+ *
+ * The class interface supports creation of nodes by cloning a so-called
+ * exemplar. The idea is that it is sometimes useful to implement different
+ * element types by different classes, and to implement this by looking up
+ * exemplars.
+ * Imagine you have three element types A, B, and C, and three classes
+ * a, b, and c implementing the node interface (for example, by providing
+ * different extensions, see below). The XML parser can be configured to
+ * have a lookup table
+ * { A --> a0, B --> b0, C --> c0 }
+ * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty
+ * objects belonging to these classes. If the parser finds an instance of
+ * A, it looks up the exemplar a0 of A and clones it (actually, the method
+ * "create_element" performs this for elements, and "create_data" for data
+ * nodes). Clones belong to the same class as the original nodes, so the
+ * instances of the elements have the same classes as the configured
+ * exemplars.
+ * Note: This technique assumes that the interface of all exemplars is the
+ * same!
+ *
+ * THE EXTENSION
+ *
+ * The class type node and all its implementations have a class parameter
+ * 'ext which must at least fulfil the properties of the class type "extension".
+ * The idea is that you can add properties, for example:
+ *
+ * class my_extension =
+ * object
+ * (* minimal properties required by class type "extension": *)
+ * method clone = ...
+ * method node = ...
+ * method set_node n = ...
+ * (* here my own methods: *)
+ * method do_this_and_that ...
+ * end
+ *
+ * class my_element_impl = [ my_extension ] element_impl
+ * class my_data_impl = [ my_extension ] data_impl
+ *
+ * The whole XML parser is parameterized with 'ext, so your extension is
+ * visible everywhere (this is the reason why extensibility is solved by
+ * parametric polymorphism and not by inclusive polymorphism (subtyping)).
+ *
+ *
+ * SOME COMPLICATED TYPE EXPRESSIONS
+ *
+ * Sometimes the following type expressions turn out to be necessary:
+ *
+ * 'a node extension as 'a
+ * This is the type of an extension that belongs to a node that
+ * has an extension that is the same as we started with.
+ *
+ * 'a extension node as 'a
+ * This is the type of a node that has an extension that belongs to a
+ * node of the type we started with.
+ *
+ *
+ * DOCUMENTS
+ * ...
+ *
+ * ======================================================================
+ *
+ * SIMPLE USAGE: ...
+ *)
+
+
+open Pxp_dtd
+
+
+type node_type =
+ (* The basic and most important node types:
+ * - T_element element_type is the type of element nodes
+ * - T_data is the type of text data nodes
+ * By design of the parser, neither CDATA sections nor entity references
+ * are represented in the node tree; so there are no types for them.
+ *)
+ T_element of string
+ | T_data
+
+ (* The following types are extensions to my original design. They have mainly
+ * been added to simplify the implementation of standards (such as
+ * XPath) that require that nodes of these types are included into the
+ * main document tree.
+ * There are options (see Pxp_yacc) forcing the parser to insert such
+ * nodes; in this case, the nodes are actually element nodes serving
+ * as wrappers for the additional data structures. The options are:
+ * enable_super_root_node, enable_pinstr_nodes, enable_comment_nodes.
+ * By default, such nodes are not created.
+ *)
+ | T_super_root
+ | T_pinstr of string (* The string is the target of the PI *)
+ | T_comment
+
+ (* The following types are fully virtual. This means that it is impossible
+ * to make the parser insert such nodes. However, these types might be
+ * practical when defining views on the tree.
+ * Note that the list of virtual node types will be extended if necessary.
+ *)
+ | T_none
+ | T_attribute of string (* The string is the name of the attribute *)
+ | T_namespace of string (* The string is the namespace prefix *)
+;;
+
+
+class type [ 'node ] extension =
+ object ('self)
+ method clone : 'self
+ (* "clone" should return an exact deep copy of the object. *)
+ method node : 'node
+ (* "node" returns the corresponding node of this extension. This method
+ * intended to return exactly what previously has been set by "set_node".
+ *)
+ method set_node : 'node -> unit
+ (* "set_node" is invoked once the extension is associated to a new
+ * node object.
+ *)
+ end
+;;
+
+
+class type [ 'ext ] node =
+ object ('self)
+ constraint 'ext = 'ext node #extension
+
+ method extension : 'ext
+ (* Return the extension of this node: *)
+
+ method delete : unit
+ (* Delete this node from the parent's list of sub nodes. This node gets
+ * orphaned.
+ * 'delete' does nothing if this node does not have a parent.
+ *)
+
+ method parent : 'ext node
+ (* Get the parent, or raise Not_found if this node is an orphan. *)
+
+ method root : 'ext node
+ (* Get the direct or indirect parent that does not have a parent itself,
+ * i.e. the root of the tree.
+ *)
+
+ method orphaned_clone : 'self
+ (* return an exact clone of this element and all sub nodes (deep copy)
+ * except string values which are shared by this node and the clone.
+ * The other exception is that the clone has no parent (i.e. it is now
+ * a root).
+ *)
+
+ method orphaned_flat_clone : 'self
+ (* return a clone of this element where all subnodes are omitted.
+ * The type of the node, and the attributes are the same as in the
+ * original node.
+ * The clone has no parent.
+ *)
+
+ method add_node : ?force:bool -> 'ext node -> unit
+ (* Append new sub nodes -- mainly used by the parser itself, but
+ * of course open for everybody. If an element is added, it must be
+ * an orphan (i.e. does not have a parent node); and after addition
+ * *this* node is the new parent.
+ * The method performs some basic validation checks if the current node
+ * has a regular expression as content model, or is EMPTY. You can
+ * turn these checks off by passing ~force:true to the method.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* Add a processing instruction to the set of processing instructions of
+ * this node. Usually only elements contain processing instructions.
+ *)
+
+ method pinstr : string -> proc_instruction list
+ (* Get all processing instructions with the passed name *)
+
+ method pinstr_names : string list
+ (* Get a list of all names of processing instructions *)
+
+ method node_position : int
+ (* Returns the position of this node among all children of the parent
+ * node. Positions are counted from 0.
+ * Raises Not_found if the node is the root node.
+ *)
+
+ method node_path : int list
+ (* Returns the list of node positions of the ancestors of this node,
+ * including this node. The first list element is the node position
+ * of this child of the root, and the last list element is the
+ * node position of this node.
+ * Returns [] if the node is the root node.
+ *)
+
+ method sub_nodes : 'ext node list
+ (* Get the list of sub nodes *)
+
+ method iter_nodes : ('ext node -> unit) -> unit
+ (* iterate over the sub nodes *)
+
+ method iter_nodes_sibl :
+ ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+ (* Here every iteration step can also access to the previous and to the
+ * following node if present.
+ *)
+
+ method nth_node : int -> 'ext node
+ (* Returns the n-th sub node of this node, n >= 0. Raises Not_found
+ * if the index is out of the valid range.
+ * Note that the first invocation of this method requires additional
+ * overhead.
+ *)
+
+ method previous_node : 'ext node
+ method next_node : 'ext node
+ (* Return the previous and next nodes, respectively. These methods are
+ * equivalent to
+ * - parent # nth_node (self # node_position - 1) and
+ * - parent # nth_node (self # node_position + 1), respectively.
+ *)
+
+ method set_nodes : 'ext node list -> unit
+ (* Set the list of sub nodes. Elements that are no longer sub nodes gets
+ * orphaned, and all new elements that previously were not sub nodes
+ * must have been orphaned.
+ *)
+
+ method data : string
+ (* Get the data string of this node. For data nodes, this string is just
+ * the content. For elements, this string is the concatenation of all
+ * subordinate data nodes.
+ *)
+
+ method node_type : node_type
+ (* Get the name of the element type. *)
+
+ method position : (string * int * int)
+ (* Return the name of the entity, the line number, and the column
+ * position (byte offset) of the beginning of the element.
+ * Only available if the element has been created with position
+ * information.
+ * Returns "?",0,0 if not available. (Note: Line number 0 is not
+ * possible otherwise.)
+ *)
+
+ method attribute : string -> Pxp_types.att_value
+ method attribute_names : string list
+ method attribute_type : string -> Pxp_types.att_type
+ method attributes : (string * Pxp_types.att_value) list
+ (* Get a specific attribute; get the names of all attributes; get the
+ * type of a specific attribute; get names and values of all attributes.
+ * Only elements have attributes.
+ * Note: If the DTD allows arbitrary for this element, "attribute_type"
+ * raises Undeclared.
+ *)
+
+ method required_string_attribute : string -> string
+ method required_list_attribute : string -> string list
+ (* Return the attribute or fail if the attribute is not present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method optional_string_attribute : string -> string option
+ method optional_list_attribute : string -> string list
+ (* Return some attribute value or return None if the attribute is not
+ * present:
+ * The first version passes the value always as string back;
+ * the second version always as list.
+ *)
+
+ method id_attribute_name : string
+ method id_attribute_value : string
+ (* Return the name and value of the ID attribute. The methods may
+ * raise Not_found if there is no ID attribute in the DTD, or no
+ * ID attribute in the element, respectively.
+ *)
+
+ method idref_attribute_names : string list
+ (* Returns the list of attribute names of IDREF or IDREFS type. *)
+
+ method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+ (* Sets the attributes but does not check whether they match the DTD.
+ *)
+
+ method attributes_as_nodes : 'ext node list
+ (* Experimental feature: Return the attributes as node list. Every node
+ * has type T_attribute n, and contains only the single attribute n.
+ * This node list is computed on demand, so the first invocation of this
+ * method will create the list, and following invocations will only
+ * return the existing list.
+ *)
+
+ method set_comment : string option -> unit
+ (* Sets the comment string; only applicable for T_comment nodes *)
+
+ method comment : string option
+ (* Get the comment string.
+ * Returns always None for nodes with a type other than T_comment.
+ *)
+
+ method dtd : dtd
+ (* Get the DTD. Fails if no DTD is specified (which is impossible if
+ * 'create_element' or 'create_data' have been used to create this
+ * object)
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Get the encoding which is always the same as the encoding of the
+ * DTD. See also method 'dtd' (Note: This method fails, too, if
+ * no DTD is present.)
+ *)
+
+ method create_element :
+ ?position:(string * int * int) ->
+ dtd -> node_type -> (string * string) list -> 'ext node
+ (* create an "empty copy" of this element:
+ * - new DTD
+ * - new node type (which must not be T_data)
+ * - new attribute list
+ * - empty list of nodes
+ *)
+
+ method create_data : dtd -> string -> 'ext node
+ (* create an "empty copy" of this data node: *)
+
+ method local_validate :
+ ?use_dfa:bool ->
+ unit -> unit
+ (* Check that this element conforms to the DTD.
+ * Option ~use_dfa: If true, the deterministic finite automaton of
+ * regexp content models is used for validation, if available.
+ * Defaults to false.
+ *)
+
+ method keep_always_whitespace_mode : unit
+ (* Normally, add_node does not accept data nodes when the DTD does not
+ * allow data nodes or only whitespace ("ignorable whitespace").
+ * Once you have invoked this method, ignorable whitespace is forced
+ * to be included into the document.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* Write the contents of this node and the subtrees to the passed
+ * output stream; the passed encoding is used. The format
+ * is compact (the opposite of "pretty printing").
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+
+ (* ---------------------------------------- *)
+ (* The methods 'find' and 'reset_finder' are no longer supported.
+ * The functionality is provided by the configurable index object
+ * (see Pxp_yacc).
+ *)
+
+
+ (* ---------------------------------------- *)
+ (* internal methods: *)
+ method internal_adopt : 'ext node option -> int -> unit
+ method internal_set_pos : int -> unit
+ method internal_delete : 'ext node -> unit
+ method internal_init : (string * int * int) ->
+ dtd -> string -> (string * string) list -> unit
+ method internal_init_other : (string * int * int) ->
+ dtd -> node_type -> unit
+ end
+;;
+
+
+class [ 'ext ] data_impl : 'ext -> [ 'ext ] node
+ (* Creation:
+ * new data_impl an_extension
+ * creates a new data node with the given extension and the empty string
+ * as content.
+ *)
+;;
+
+
+class [ 'ext ] element_impl : 'ext -> [ 'ext ] node
+ (* Creation:
+ * new element_impl an_extension
+ * creates a new empty element node with the given extension.
+ *)
+;;
+
+
+(* Attribute and namespace nodes are experimental: *)
+
+class [ 'ext ] attribute_impl :
+ element:string -> name:string -> Pxp_types.att_value -> dtd -> [ 'ext ] node
+
+ (* Creation:
+ * new attribute_impl element_name attribute_name attribute_value dtd
+ * Note that attribute nodes do intentionally not have extensions.
+ *)
+
+(* Once namespaces get implemented:
+class [ 'ext ] namespace_impl :
+ prefix:string -> name:string -> dtd -> [ 'ext ] node
+*)
+
+(********************************** spec *********************************)
+
+type 'ext spec
+constraint 'ext = 'ext node #extension
+ (* Contains the exemplars used for the creation of new nodes
+ *)
+
+
+val make_spec_from_mapping :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_mapping: (string, 'ext node) Hashtbl.t ->
+ unit ->
+ 'ext spec
+ (* Specifies:
+ * - For new data nodes, the ~data_exemplar must be used
+ * - For new element nodes: If the element type is mentioned in the
+ * ~element_mapping hash table, the exemplar found in this table is
+ * used. Otherwise, the ~default_element_exemplar is used.
+ * Optionally:
+ * - You may also specify exemplars for super root nodes, for comments
+ * and for processing instructions
+ *)
+
+val make_spec_from_alist :
+ ?super_root_exemplar : 'ext node ->
+ ?comment_exemplar : 'ext node ->
+ ?default_pinstr_exemplar : 'ext node ->
+ ?pinstr_alist : (string * 'ext node) list ->
+ data_exemplar: 'ext node ->
+ default_element_exemplar: 'ext node ->
+ element_alist: (string * 'ext node) list ->
+ unit ->
+ 'ext spec
+ (* This is a convenience function: You can pass the mappings from
+ * elements and PIs to exemplar by associative lists.
+ *)
+
+val create_data_node :
+ 'ext spec -> dtd -> string -> 'ext node
+val create_element_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> string -> (string * string) list -> 'ext node
+val create_super_root_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> 'ext node
+val create_comment_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> string -> 'ext node
+val create_pinstr_node :
+ ?position:(string * int * int) ->
+ 'ext spec -> dtd -> proc_instruction -> 'ext node
+ (* These functions use the exemplars contained in a spec and create fresh
+ * node objects from them.
+ *)
+
+val create_no_node :
+ ?position:(string * int * int) -> 'ext spec -> dtd -> 'ext node
+ (* Creates a T_none node with limited functionality *)
+
+(*********************** Ordering of nodes ******************************)
+
+val compare : 'ext node -> 'ext node -> int
+ (* Returns -1 if the first node is before the second node, or +1 if the
+ * first node is after the second node, or 0 if both nodes are identical.
+ * If the nodes are unrelated (do not have a common ancestor), the result
+ * is undefined.
+ * This test is rather slow.
+ *)
+
+type 'ext ord_index
+constraint 'ext = 'ext node #extension
+ (* The type of ordinal indexes *)
+
+val create_ord_index : 'ext node -> 'ext ord_index
+ (* Creates an ordinal index for the subtree starting at the passed node.
+ * This index assigns to every node an ordinal number (beginning with 0) such
+ * that nodes are numbered upon the order of the first character in the XML
+ * representation (document order).
+ * Note that the index is not automatically updated when the tree is
+ * modified.
+ *)
+
+val ord_number : 'ext ord_index -> 'ext node -> int
+ (* Returns the ordinal number of the node, or raises Not_found *)
+
+val ord_compare : 'ext ord_index -> 'ext node -> 'ext node -> int
+ (* Compares two nodes like 'compare':
+ * Returns -1 if the first node is before the second node, or +1 if the
+ * first node is after the second node, or 0 if both nodes are identical.
+ * If one of the nodes does not occur in the ordinal index, Not_found
+ * is raised.
+ * This test is much faster than 'compare'.
+ *)
+
+
+(***************************** Iterators ********************************)
+
+val find : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node
+ (* Searches the first node for which the predicate f is true, and returns
+ * it. Raises Not_found if there is no such node.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+val find_all : ?deeply:bool ->
+ f:('ext node -> bool) -> 'ext node -> 'ext node list
+ (* Searches all nodes for which the predicate f is true, and returns them.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+val find_element : ?deeply:bool ->
+ string -> 'ext node -> 'ext node
+ (* Searches the first element with the passed element type.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+val find_all_elements : ?deeply:bool ->
+ string -> 'ext node -> 'ext node list
+ (* Searches all elements with the passed element type.
+ * By default, ~deeply=false. In this case, only the children of the
+ * passed node are searched.
+ * If passing ~deeply=true, the children are searched recursively
+ * (depth-first search).
+ *)
+
+exception Skip
+val map_tree : pre:('exta node -> 'extb node) ->
+ ?post:('extb node -> 'extb node) ->
+ 'exta node ->
+ 'extb node
+ (* Traverses the passed node and all children recursively. After entering
+ * a node, the function ~pre is called. The result of this function must
+ * be a new node; it must not have children nor a parent (you can simply
+ * pass (fun n -> n # orphaned_flat_clone) as ~pre).
+ * After that, the children are processed in the same way (from left to
+ * right); the results of the transformation will be added to the
+ * new node as new children.
+ * Now, the ~post function is invoked with this node as argument, and
+ * the result is the result of the function (~post should return a root
+ * node, too; if not specified, the identity is the ~post function).
+ * Both ~pre and ~post may raise Skip, which causes that the node is
+ * left out. If the top node is skipped, the exception Not_found is
+ * raised.
+ *)
+
+val map_tree_sibl :
+ pre: ('exta node option -> 'exta node -> 'exta node option ->
+ 'extb node) ->
+ ?post:('extb node option -> 'extb node -> 'extb node option ->
+ 'extb node) ->
+ 'exta node ->
+ 'extb node
+ (* Works like map_tree, but the function ~pre and ~post have additional
+ * arguments:
+ * - ~pre l n r: The node n is the node to map, and l is the previous
+ * node, and r is the next node (both None if not present). l and r
+ * are both nodes before the transformation.
+ * - ~post l n r: The node n is the node which is the result of ~pre
+ * plus adding children. l and r are again the previous and the next
+ * node, respectively, but after being transformed.
+ *)
+
+val iter_tree : ?pre:('ext node -> unit) ->
+ ?post:('ext node -> unit) ->
+ 'ext node ->
+ unit
+ (* Iterates only instead of mapping the nodes. *)
+
+val iter_tree_sibl :
+ ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+ 'ext node ->
+ unit
+ (* Iterates only instead of mapping the nodes. *)
+
+
+(******************************* document ********************************)
+
+
+class [ 'ext ] document :
+ Pxp_types.collect_warnings ->
+ object
+ (* Documents: These are containers for root elements and for DTDs.
+ *
+ * Important invariant: A document is either empty (no root element,
+ * no DTD), or it has both a root element and a DTD.
+ *
+ * A fresh document created by 'new' is empty.
+ *)
+
+ method init_xml_version : string -> unit
+ (* Set the XML version string of the XML declaration. *)
+
+ method init_root : 'ext node -> unit
+ (* Set the root element. It is expected that the root element has
+ * a DTD.
+ * Note that 'init_root' checks whether the passed root element
+ * has the type expected by the DTD. The check takes into account
+ * that the root element might be a virtual root node.
+ *)
+
+ method xml_version : string
+ (* Returns the XML version from the XML declaration. Returns "1.0"
+ * if the declaration is missing.
+ *)
+
+ method xml_standalone : bool
+ (* Returns whether this document is declared as being standalone.
+ * This method returns the same value as 'standalone_declaration'
+ * of the DTD (if there is a DTD).
+ * Returns 'false' if there is no DTD.
+ *)
+
+ method dtd : dtd
+ (* Returns the DTD of the root element.
+ * Fails if there is no root element.
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Returns the string encoding of the document = the encoding of
+ * the root element = the encoding of the element tree = the
+ * encoding of the DTD.
+ * Fails if there is no root element.
+ *)
+
+ method root : 'ext node
+ (* Returns the root element, or fails if there is not any. *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* Adds a processing instruction to the document container.
+ * The parser does this for PIs occurring outside the DTD and outside
+ * the root element.
+ *)
+
+ method pinstr : string -> proc_instruction list
+ (* Return all PIs for a passed target string. *)
+
+ method pinstr_names : string list
+ (* Return all target strings of all PIs. *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* Write the document to the passed
+ * output stream; the passed encoding used. The format
+ * is compact (the opposite of "pretty printing").
+ * If a DTD is present, the DTD is included into the internal subset.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.10 2000/08/30 15:47:37 gerd
+ * New method node_path.
+ * New function compare.
+ * New type ord_index with functions.
+ *
+ * Revision 1.9 2000/08/26 23:27:53 gerd
+ * New function: make_spec_from_alist.
+ * New iterators: find, find_all, find_element, find_all_elements,
+ * map_tree, map_tree_sibl, iter_tree, iter_tree_sibl.
+ * New node methods: node_position, nth_node, previous_node,
+ * next_node.
+ * Attribute and namespace types have now a string argument:
+ * the name/prefix. I hope this simplifies the handling of view nodes.
+ * First implementation of view nodes: attribute_impl. The
+ * method attributes_as_nodes returns the attributes wrapped into
+ * T_attribute nodes which reside outside the document tree.
+ *
+ * Revision 1.8 2000/08/18 20:14:00 gerd
+ * New node_types: T_super_root, T_pinstr, T_comment, (T_attribute),
+ * (T_none), (T_namespace).
+ *
+ * Revision 1.7 2000/07/23 02:16:34 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.6 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.5 2000/07/14 13:56:11 gerd
+ * Added methods id_attribute_name, id_attribute_value,
+ * idref_attribute_names.
+ *
+ * Revision 1.4 2000/07/09 17:51:14 gerd
+ * Element nodes can store positions.
+ *
+ * Revision 1.3 2000/07/04 22:05:10 gerd
+ * New functions make_spec_from_mapping, create_data_node,
+ * create_element_node.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_document.mli:
+ *
+ * Revision 1.13 2000/05/27 19:15:08 gerd
+ * Removed the method init_xml_standalone.
+ *
+ * Revision 1.12 2000/05/01 20:42:34 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.11 2000/04/30 18:15:57 gerd
+ * Beautifications.
+ * New method keep_always_whitespace_mode.
+ *
+ * Revision 1.10 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.9 2000/01/27 21:51:56 gerd
+ * Added method 'attributes'.
+ *
+ * Revision 1.8 2000/01/27 21:19:07 gerd
+ * Added further methods.
+ *
+ * Revision 1.7 1999/11/09 22:20:14 gerd
+ * Removed method init_dtd from class "document". The DTD is
+ * implicitly passed to the document by the root element.
+ *
+ * Revision 1.6 1999/09/01 22:51:40 gerd
+ * Added methods to store processing instructions.
+ *
+ * Revision 1.5 1999/09/01 16:19:57 gerd
+ * The "document" class has now a "warner" as class argument.
+ *
+ * Revision 1.4 1999/08/19 21:59:13 gerd
+ * Added method "reset_finder".
+ *
+ * Revision 1.3 1999/08/19 01:08:29 gerd
+ * Added method "find".
+ *
+ * Revision 1.2 1999/08/15 02:19:41 gerd
+ * Some new explanations: That unknown elements are not rejected
+ * if the DTD allows them.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_lexers
+open Pxp_entity
+open Pxp_aux
+open Pxp_dfa
+
+(**********************************************************************)
+
+class dtd the_warner init_encoding =
+ object (self)
+ val mutable root = (None : string option)
+ val mutable id = (None : dtd_id option)
+
+ val warner = (the_warner : collect_warnings)
+ val encoding = init_encoding
+ val lexerset = Pxp_lexers.get_lexer_set init_encoding
+
+ val elements = (Hashtbl.create 100 : (string,dtd_element) Hashtbl.t)
+ val gen_entities = (Hashtbl.create 100 : (string,entity * bool) Hashtbl.t)
+ val par_entities = (Hashtbl.create 100 : (string,entity) Hashtbl.t)
+ val notations = (Hashtbl.create 100 : (string,dtd_notation) Hashtbl.t)
+ val pinstr = (Hashtbl.create 100 : (string,proc_instruction) Hashtbl.t)
+ val mutable element_names = []
+ val mutable gen_entity_names = []
+ val mutable par_entity_names = []
+ val mutable notation_names = []
+ val mutable pinstr_names = []
+
+ val mutable allow_arbitrary = false
+ val mutable standalone_declaration = false
+
+ val mutable validated = false
+
+ initializer
+ let w = new drop_warnings in
+ self # add_gen_entity
+ (new internal_entity self "lt" w "&#60;" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "gt" w ">" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "amp" w "&#38;" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "apos" w "'" false false false encoding)
+ false;
+ self # add_gen_entity
+ (new internal_entity self "quot" w """ false false false encoding)
+ false;
+
+
+ method encoding = encoding
+
+ method warner = warner
+
+ method set_root r =
+ if root = None then
+ root <- Some r
+ else
+ assert false
+
+
+ method set_id j =
+ if id = None then
+ id <- Some j
+ else
+ assert false
+
+
+ method standalone_declaration = standalone_declaration
+
+ method set_standalone_declaration b =
+ standalone_declaration <- b
+
+ method allow_arbitrary =
+ allow_arbitrary <- true
+
+ method disallow_arbitrary =
+ allow_arbitrary <- false
+
+ method arbitrary_allowed = allow_arbitrary
+
+ method root = root
+ method id = id
+
+
+ method add_element el =
+ (* raises Not_found if 'el' has already been added *)
+ (* Note: 'el' is encoded in the same way as 'self'! *)
+ let name = el # name in
+ check_name warner name;
+ if Hashtbl.mem elements name then
+ raise Not_found;
+ Hashtbl.add elements name el;
+ element_names <- name :: element_names;
+ validated <- false
+
+
+ method add_gen_entity en extdecl =
+ (* The following is commented out; perhaps there should be an option
+ * to reactivate it on demand
+ *)
+ (* raises Validation_error if the predefines entities 'lt', 'gt', 'amp',
+ * 'quot', and 'apos' are redeclared with an improper value.
+ *)
+ if en # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_gen_entity: Inconsistent encodings";
+ let name = en # name in
+ check_name warner name;
+ if Hashtbl.mem gen_entities name then begin
+ if List.mem name [ "lt"; "gt"; "amp"; "quot"; "apos" ] then begin
+ (* These are allowed to be declared several times *)
+ let (rt,_) = en # replacement_text in
+ let toks = tokens_of_content_string lexerset rt in
+ try
+ begin match toks with
+ [CRef 60] -> if name <> "lt" then raise Not_found
+ | [CharData ">"] -> if name <> "gt" then raise Not_found
+ | [CRef 62] -> if name <> "gt" then raise Not_found
+ | [CRef 38] -> if name <> "amp" then raise Not_found
+ | [CharData "'"] -> if name <> "apos" then raise Not_found
+ | [CRef 39] -> if name <> "apos" then raise Not_found
+ | [CharData "\""] -> if name <> "quot" then raise Not_found
+ | [CRef 34] -> if name <> "quot" then raise Not_found
+ | _ -> raise Not_found
+ end
+ with
+ Not_found ->
+ raise (Validation_error("Predefined entity `" ^ name ^
+ "' redeclared"))
+ end
+ else
+ warner # warn ("Entity `" ^ name ^ "' declared twice")
+ end
+ else begin
+ Hashtbl.add gen_entities name (en, extdecl);
+ gen_entity_names <- name :: gen_entity_names
+ end
+
+
+ method add_par_entity en =
+ if en # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_par_entity: Inconsistent encodings";
+ let name = en # name in
+ check_name warner name;
+ if not (Hashtbl.mem par_entities name) then begin
+ Hashtbl.add par_entities name en;
+ par_entity_names <- name :: par_entity_names
+ end
+ else
+ warner # warn ("Entity `" ^ name ^ "' declared twice")
+
+
+ method add_notation no =
+ (* raises Validation_error if 'no' already added *)
+ if no # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_notation: Inconsistent encodings";
+ let name = no # name in
+ check_name warner name;
+ if Hashtbl.mem notations name then
+ raise (Validation_error("Notation `" ^ name ^ "' declared twice"));
+ Hashtbl.add notations name no;
+ notation_names <- name :: notation_names
+
+
+ method add_pinstr pi =
+ if pi # encoding <> encoding then
+ failwith "Pxp_dtd.dtd # add_pinstr: Inconsistent encodings";
+ let name = pi # target in
+ check_name warner name;
+
+ if String.length name >= 4 && String.sub name 0 4 = "pxp:" then begin
+ match name with
+ "pxp:dtd" ->
+ let _, optname, atts = pi # parse_pxp_option in
+ begin match optname with
+ "optional-element-and-notation-declarations" ->
+ self # allow_arbitrary
+ | "optional-attribute-declarations" ->
+ let lexers = Pxp_lexers.get_lexer_set encoding in
+ let el_string =
+ try List.assoc "elements" atts
+ with Not_found ->
+ raise(Error("Missing `elements' attribute for pxp:dtd"))
+ in
+ let el = split_attribute_value lexers el_string in
+ List.iter
+ (fun e_name ->
+ let e =
+ try Hashtbl.find elements e_name
+ with
+ Not_found ->
+ raise(Error("Reference to unknown element `" ^
+ e_name ^ "'"))
+ in
+ e # allow_arbitrary
+ )
+ el
+ | _ ->
+ raise(Error("Unknown PXP option `" ^
+ optname ^ "'"))
+ end
+ | _ ->
+ raise(Error("The processing instruction target `" ^
+ name ^ "' is not defined by this PXP version"))
+ end
+ else begin
+ (*----------------------------------------------------------------------
+ * SUPPORT FOR DEPRECATED PI OPTIONS:
+ * - <?xml:allow_undeclared_elements_and_notations?>
+ * is now <?pxp:dtd optional-element-and-notation-declarations?>
+ * - <?xml:allow_undeclared_attributes <elementname>?>
+ * is now <?pxp:dtd optional-attribute-declarations
+ * elements='<elementname> ...'?>
+ * Please update your DTDs! Alternatively, you may uncommment the
+ * following piece of code.
+ *)
+(* if name = "xml:allow_undeclared_elements_and_notations" then *)
+(* self # allow_arbitrary; *)
+(* if name = "xml:allow_undeclared_attributes" then begin *)
+(* let v = pi # value in *)
+(* let e = *)
+(* try *)
+(* Hashtbl.find elements v *)
+(* with *)
+(* Not_found -> *)
+(* raise(Validation_error("Reference to undeclared element `"*)
+(* ^ v ^ "'")) *)
+(* in *)
+(* e # allow_arbitrary; *)
+(* end; *)
+ (*----------------------------------------------------------------------
+ *)
+ ()
+ end;
+ Hashtbl.add pinstr name pi;
+ pinstr_names <- name :: pinstr_names;
+
+
+ method element name =
+ (* returns the element 'name' or raises Validation_error if not found *)
+ try
+ Hashtbl.find elements name
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Reference to undeclared element `" ^ name ^ "'"))
+
+ method element_names =
+ (* returns the list of all names of element declarations *)
+ element_names
+
+
+ method gen_entity name =
+ (* returns the entity 'name' or raises WF_error if not found *)
+ try
+ Hashtbl.find gen_entities name
+ with
+ Not_found ->
+ raise(WF_error("Reference to undeclared general entity `" ^ name ^ "'"))
+
+
+ method gen_entity_names = gen_entity_names
+
+
+ method par_entity name =
+ (* returns the entity 'name' or raises WF_error if not found *)
+ try
+ Hashtbl.find par_entities name
+ with
+ Not_found ->
+ raise(WF_error("Reference to undeclared parameter entity `" ^ name ^ "'"))
+
+
+ method par_entity_names = par_entity_names
+
+
+ method notation name =
+ (* returns the notation 'name' or raises Validation_error if not found *)
+ try
+ Hashtbl.find notations name
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Reference to undeclared notation `" ^ name ^ "'"))
+
+
+ method notation_names = notation_names
+
+
+ method pinstr name =
+ (* returns the list of all processing instructions contained in the DTD
+ * with target 'name'
+ *)
+ Hashtbl.find_all pinstr name
+
+
+ method pinstr_names = pinstr_names
+
+ method write os enc doctype =
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let write_sysid s =
+ if String.contains s '"' then
+ wms ("'" ^ s ^ "'")
+ else
+ wms ("\"" ^ s ^ "\"");
+ in
+
+ if doctype then begin
+ wms "<!DOCTYPE ";
+ ( match root with
+ None -> failwith "#write: DTD without root";
+ | Some r -> wms r
+ );
+ wms " [\n";
+ end;
+
+ (* Notations: *)
+ List.iter
+ (fun name ->
+ let notation =
+ try Hashtbl.find notations name with Not_found -> assert false in
+ notation # write os enc)
+ (List.sort compare notation_names);
+
+ (* Unparsed entities: *)
+ List.iter
+ (fun name ->
+ let ent,_ =
+ try Hashtbl.find gen_entities name with Not_found -> assert false
+ in
+ if ent # is_ndata then begin
+ let xid = ent # ext_id in
+ let notation = ent # notation in
+ wms ("<!ENTITY " ^ name ^ " " );
+ ( match xid with
+ System s ->
+ wms "SYSTEM ";
+ write_sysid s;
+ | Public (p,s) ->
+ wms "PUBLIC ";
+ write_sysid p;
+ if (s <> "") then begin
+ wms " ";
+ write_sysid s;
+ end;
+ | Anonymous ->
+ failwith "#write: External ID Anonymous cannot be represented"
+ );
+ wms (" NDATA " ^ notation ^ ">\n");
+ end
+ )
+ (List.sort compare gen_entity_names);
+
+ (* Elements: *)
+ List.iter
+ (fun name ->
+ let element =
+ try Hashtbl.find elements name with Not_found -> assert false in
+ element # write os enc)
+ (List.sort compare element_names);
+
+ (* Processing instructions: *)
+ List.iter
+ (fun name ->
+ let pi =
+ try Hashtbl.find pinstr name with Not_found -> assert false in
+ pi # write os enc)
+ (List.sort compare pinstr_names);
+
+ if doctype then
+ wms "]>\n";
+
+ method write_compact_as_latin1 os doctype =
+ self # write os `Enc_iso88591 doctype
+
+
+
+ (************************************************************)
+ (* VALIDATION *)
+ (************************************************************)
+
+ method only_deterministic_models =
+ Hashtbl.iter
+ (fun n el ->
+ let cm = el # content_model in
+ match cm with
+ Regexp _ ->
+ if el # content_dfa = None then
+ raise(Validation_error("The content model of element `" ^
+ n ^ "' is not deterministic"))
+ | _ ->
+ ()
+ )
+ elements;
+
+
+ method validate =
+ if validated or allow_arbitrary then
+ ()
+ else begin
+ (* Validity constraint: Notations in NDATA entity declarations must
+ * be declared
+ *)
+ List.iter
+ (fun name ->
+ let ent,_ =
+ try Hashtbl.find gen_entities name with Not_found -> assert false
+ in
+ if ent # is_ndata then begin
+ let xid = ent # ext_id in
+ let notation = ent # notation in
+ try
+ ignore(self # notation notation)
+ (* Raises Validation_error if the constraint is violated *)
+ with
+ Undeclared -> ()
+ end
+ )
+ gen_entity_names;
+
+ (* Validate the elements: *)
+ Hashtbl.iter
+ (fun n el ->
+ el # validate)
+ elements;
+
+ (* Check the root element: *)
+ (* TODO: Check if this piece of code is executed at all! *)
+ begin match root with
+ None -> ()
+ | Some r ->
+ begin try
+ let _ = Hashtbl.find elements r in ()
+ with
+ Not_found ->
+ raise(Validation_error("The root element is not declared"))
+ end
+ end;
+ validated <- true;
+ end
+
+ method invalidate =
+ validated <- false
+
+ (************************************************************)
+
+ end
+
+
+(**********************************************************************)
+
+and dtd_element the_dtd the_name =
+ object (self)
+ val dtd = (the_dtd : dtd)
+ val name = the_name
+ val lexerset = Pxp_lexers.get_lexer_set (the_dtd # encoding)
+ val mutable content_model = Unspecified
+ val mutable content_model_validated = false
+ val mutable content_dfa = lazy None
+
+ val mutable externally_declared = false
+
+ val mutable attributes =
+ ([] : (string * ((att_type * att_default) * bool)) list)
+ val mutable attributes_validated = false
+
+ val mutable id_att_name = None
+ val mutable idref_att_names = []
+
+ val mutable allow_arbitrary = false
+
+ method name = name
+
+ method set_cm_and_extdecl m extdecl =
+ if content_model = Unspecified then begin
+ content_model <- m;
+ content_model_validated <- false;
+ content_dfa <- lazy (self # compute_content_dfa);
+ externally_declared <- extdecl;
+ dtd # invalidate
+ end
+ else
+ raise(Validation_error("Element `" ^ name ^ "' has already a content model"))
+
+ method content_model = content_model
+
+ method content_dfa = Lazy.force content_dfa
+
+ method private compute_content_dfa =
+ match content_model with
+ Regexp re ->
+ ( try Some (dfa_of_regexp_content_model re)
+ with Not_found -> None
+ )
+ | _ ->
+ None
+
+ method externally_declared = externally_declared
+
+ method encoding = dtd # encoding
+
+ method allow_arbitrary =
+ allow_arbitrary <- true
+
+ method disallow_arbitrary =
+ allow_arbitrary <- false
+
+ method arbitrary_allowed = allow_arbitrary
+
+ method add_attribute aname t d extdecl =
+ if aname <> "xml:lang" & aname <> "xml:space" then
+ check_name (dtd#warner) aname;
+ if List.mem_assoc aname attributes then
+ dtd # warner # warn ("More than one declaration for attribute `" ^
+ aname ^ "' of element type `" ^ name ^ "'")
+ else begin
+ begin match aname with
+ "xml:space" ->
+ begin match t with
+ A_enum l ->
+ let l' = Sort.list ( <= ) l in
+ if l' <> [ "default"; "preserve" ] then
+ raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
+ | _ ->
+ raise(Validation_error("Declaration of attribute `xml:space' does not conform to XML specification"))
+ end
+ | _ -> ()
+ end;
+ begin match t with
+ A_id ->
+ id_att_name <- Some aname;
+ | (A_idref | A_idrefs) ->
+ idref_att_names <- aname :: idref_att_names
+ | _ ->
+ ()
+ end;
+ attributes <- (aname, ((t,d),extdecl)) :: attributes;
+ attributes_validated <- false;
+ dtd # invalidate;
+ end
+
+ method attribute attname =
+ try
+ fst (List.assoc attname attributes)
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
+ ^ name ^ "' not declared"))
+
+ method attribute_violates_standalone_declaration attname v =
+ try
+ let (atype, adefault), extdecl = List.assoc attname attributes in
+ extdecl &&
+ ( match v with
+ None ->
+ adefault <> D_required && adefault <> D_implied
+ (* i.e. adefault matches D_default or D_fixed *)
+ | Some s ->
+ atype <> A_cdata &&
+ normalization_changes_value lexerset atype s
+ )
+ with
+ Not_found ->
+ if allow_arbitrary then
+ raise Undeclared
+ else
+ raise(Validation_error("Attribute `" ^ attname ^ "' of element `"
+ ^ name ^ "' not declared"))
+
+
+ method attribute_names =
+ List.map fst attributes
+
+ method names_of_required_attributes =
+ List.flatten
+ (List.map
+ (fun (n,((t,d),_)) ->
+ if d = D_required then
+ [n]
+ else
+ [])
+ attributes)
+
+ method id_attribute_name = id_att_name
+
+ method idref_attribute_names = idref_att_names
+
+
+ method write os enc =
+ let encoding = self # encoding in
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let rec write_contentspec cs =
+ match cs with
+ Unspecified ->
+ failwith "#write: Unspecified content model found"
+ | Empty ->
+ wms "EMPTY"
+ | Any ->
+ wms "ANY"
+ | Mixed ml ->
+ wms "(";
+ write_mixedspec_list ml;
+ wms ")*";
+ | Regexp re ->
+ write_children re false
+
+ and write_mixedspec_list ml =
+ match ml with
+ MPCDATA :: ml' ->
+ wms "#PCDATA";
+ if ml' <> [] then wms "|";
+ write_mixedspec_list ml';
+ | MChild s :: ml' ->
+ wms s;
+ if ml' <> [] then wms "|";
+ write_mixedspec_list ml';
+ | [] ->
+ ()
+
+ and write_children re cp =
+ match re with
+ Optional re' ->
+ let p = needs_parens re' in
+ if p then wms "(";
+ write_children re' cp;
+ if p then wms ")";
+ wms "?";
+ | Repeated re' ->
+ let p = needs_parens re' in
+ if p then wms "(";
+ write_children re' cp;
+ if p then wms ")";
+ wms "*";
+ | Repeated1 re' ->
+ let p = needs_parens re' in
+ if p then wms "(";
+ write_children re' cp;
+ if p then wms ")";
+ wms "+";
+ | Alt re' ->
+ wms "(";
+ ( match re' with
+ re1' :: rer' ->
+ write_children re1' true;
+ List.iter
+ (fun ren' ->
+ wms "|";
+ write_children ren' true;
+ )
+ rer';
+ | [] ->
+ failwith "#write: Illegal content model"
+ );
+ wms ")";
+ | Seq re' ->
+ wms "(";
+ ( match re' with
+ re1' :: rer' ->
+ write_children re1' true;
+ List.iter
+ (fun ren' ->
+ wms ",";
+ write_children ren' true;
+ )
+ rer';
+ | [] ->
+ failwith "#write: Illegal content model"
+ );
+ wms ")";
+ | Child ch ->
+ if not cp then wms "(";
+ wms ch;
+ if not cp then wms ")";
+
+ and needs_parens re =
+ match re with
+ (Optional _ | Repeated _ | Repeated1 _ ) -> true
+ | _ -> false
+ in
+
+ wms ("<!ELEMENT " ^ name ^ " ");
+ write_contentspec content_model;
+ wms ">\n";
+
+ wms ("<!ATTLIST " ^ name);
+ List.iter
+ (fun (n,((t,d),_)) ->
+ wms ("\n " ^ n);
+ ( match t with
+ A_cdata -> wms " CDATA";
+ | A_id -> wms " ID";
+ | A_idref -> wms " IDREF";
+ | A_idrefs -> wms " IDREFS";
+ | A_entity -> wms " ENTITY";
+ | A_entities -> wms " ENTITIES";
+ | A_nmtoken -> wms " NMTOKEN";
+ | A_nmtokens -> wms " NMTOKENS";
+ | A_notation nl ->
+ wms " NOTATION (";
+ ( match nl with
+ nl1:: nl' ->
+ wms nl1;
+ List.iter
+ (fun n ->
+ wms ("|" ^ n);
+ )
+ nl'
+ | [] ->
+ failwith "#write: Illegal content model";
+ );
+ wms ")";
+ | A_enum el ->
+ wms " (";
+ ( match el with
+ el1:: el' ->
+ wms el1;
+ List.iter
+ (fun e ->
+ wms ("|" ^ e);
+ )
+ el'
+ | [] ->
+ failwith "#write: Illegal content model";
+ );
+ wms ")";
+ );
+ ( match d with
+ D_required -> wms " #REQUIRED"
+ | D_implied -> wms " #IMPLIED"
+ | D_default s ->
+ wms " \"";
+ write_data_string ~from_enc:encoding ~to_enc:enc os s;
+ wms "\"";
+ | D_fixed s ->
+ wms " FIXED \"";
+ write_data_string ~from_enc:encoding ~to_enc:enc os s;
+ wms "\"";
+ );
+ )
+ attributes;
+
+ wms ">\n";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ (************************************************************)
+ (* VALIDATION *)
+ (************************************************************)
+
+ method validate =
+ self # validate_attributes();
+ self # validate_content_model()
+
+ method private validate_attributes() =
+ if attributes_validated then
+ ()
+ else begin
+ (* Validity Constraint: One ID per Element Type *)
+ let n = count (fun (n,((t,d),_)) -> t = A_id) attributes in
+ if n > 1 then
+ raise(Validation_error("More than one ID attribute for element `" ^ name ^ "'"));
+ (* Validity Constraint: ID Attribute Default *)
+ if List.exists
+ (fun (n,((t,d),_)) ->
+ t = A_id & (d <> D_required & d <> D_implied))
+ attributes
+ then
+ raise(Validation_error("ID attribute must be #IMPLIED or #REQUIRED; element `" ^ name ^ "'"));
+ (* Validity Constraint: One Notation per Element Type *)
+ let n = count (fun (n,((t,d),_)) ->
+ match t with A_notation _ -> true | _ -> false)
+ attributes in
+ if n > 1 then
+ raise(Validation_error("More than one NOTATION attribute for element `" ^ name ^ "'"));
+ (* Validity Constraint: Notation Attributes [second part] *)
+ List.iter
+ (fun (n,((t,d),_)) ->
+ match t with
+ A_notation l ->
+ List.iter
+ (fun nname ->
+ let _ = dtd # notation nname in ())
+ l
+ | _ -> ())
+ attributes;
+ (* Validity Constraint: Attribute Default Legal *)
+ List.iter
+ (fun (n,((t,d),_)) ->
+
+ let check v =
+ let lexical_error() =
+ lazy (raise(Validation_error("Default value for attribute `" ^ n ^ "' is lexically malformed"))) in
+ check_attribute_value_lexically lexerset (lexical_error()) t v;
+ begin match t with
+ (A_entity|A_entities) ->
+ List.iter
+ (fun nd ->
+ let en, extdecl = dtd # gen_entity nd in
+ if not (en # is_ndata) then
+ raise(Validation_error("Attribute default value must be the name of an NDATA entity; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+(* if dtd # standalone_declaration && extdecl then
+ raise(Validation_error("Attribute default value violates the standalone declaration; attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+-- This is checked anyway when the attribute value is normalized
+*)
+ )
+ (split_attribute_value lexerset v)
+ | A_notation nl ->
+ if not (List.mem v nl) then
+ raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+ | A_enum nl ->
+ if not (List.mem v nl) then
+ raise(Validation_error("Illegal default value for attribute `" ^ n ^ "' in declaration for element `" ^ name ^ "'"));
+ | _ -> ()
+ end
+ in
+
+ match d with
+ D_required -> ()
+ | D_implied -> ()
+ | D_default v -> check v
+ | D_fixed v -> check v
+ )
+ attributes;
+
+ (* Ok: This element declaration is valid *)
+ attributes_validated <- true;
+
+ end
+
+ method private validate_content_model () =
+ (* checks:
+ * - Validity Constraint: No Duplicate Types
+ * It is not an error if there is a child in the declaration for which
+ * no element declaration is provided.
+ *)
+ match content_model with
+ Unspecified ->
+ dtd # warner # warn ("Element type `" ^ name ^ "' mentioned but not declared");
+ ()
+ | Empty -> ()
+ | Any -> ()
+ | Mixed (pcdata :: l) ->
+ (* MPCDATA is always the first element by construction *)
+ assert (pcdata = MPCDATA);
+ if check_dups l then
+ raise (Validation_error("Double children in declaration for element `" ^ name ^ "'"))
+ | Regexp _ -> ()
+ | _ -> assert false
+
+
+
+ (************************************************************)
+
+ end
+
+and dtd_notation the_name the_xid init_encoding =
+object (self)
+ val name = the_name
+ val xid = (the_xid : ext_id)
+ val encoding = (init_encoding : Pxp_types.rep_encoding)
+ method name = name
+ method ext_id = xid
+ method encoding = encoding
+
+ method write os enc =
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ let write_sysid s =
+ if String.contains s '"' then
+ wms ("'" ^ s ^ "'")
+ else
+ wms ("\"" ^ s ^ "\"");
+ in
+
+ wms ("<!NOTATION " ^ name ^ " ");
+ ( match xid with
+ System s ->
+ wms "SYSTEM ";
+ write_sysid s;
+ | Public (p,s) ->
+ wms "PUBLIC ";
+ write_sysid p;
+ if (s <> "") then begin
+ wms " ";
+ write_sysid s;
+ end;
+ | Anonymous ->
+ failwith "#write: External ID Anonymous cannot be represented"
+ );
+ wms ">\n";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ end
+
+and proc_instruction the_target the_value init_encoding =
+object (self)
+ val target = the_target
+ val value = (the_value : string)
+ val encoding = (init_encoding : Pxp_types.rep_encoding)
+
+ initializer
+ match target with
+ ("xml"|"xmL"|"xMl"|"xML"|"Xml"|"XmL"|"XMl"|"XML") ->
+ (* This is an error, not a warning, because I do not have a
+ * "warner" object by hand.
+ *)
+ raise(WF_error("Reserved processing instruction"))
+ | _ -> ()
+
+ method target = target
+ method value = value
+ method encoding = encoding
+
+ method write os enc =
+ let wms =
+ write_markup_string ~from_enc:encoding ~to_enc:enc os in
+
+ wms "<?";
+ wms target;
+ wms " ";
+ wms value;
+ wms "?>";
+
+ method write_compact_as_latin1 os =
+ self # write os `Enc_iso88591
+
+ method parse_pxp_option =
+ let lexers = get_lexer_set encoding in
+ try
+ let toks = tokens_of_xml_pi lexers value in (* may raise WF_error *)
+ begin match toks with
+ (Pro_name option_name) :: toks' ->
+ let atts = decode_xml_pi toks' in (* may raise WF_error *)
+ (target, option_name, atts)
+ | _ ->
+ raise(Error("Bad PXP processing instruction"))
+ end
+ with
+ WF_error _ ->
+ raise(Error("Bad PXP processing instruction"))
+
+ end
+;;
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.10 2000/08/18 21:18:45 gerd
+ * Updated wrong comments for methods par_entity and gen_entity.
+ * These can raise WF_error and not Validation_error, and this is the
+ * correct behaviour.
+ *
+ * Revision 1.9 2000/07/25 00:30:01 gerd
+ * Added support for pxp:dtd PI options.
+ *
+ * Revision 1.8 2000/07/23 02:16:34 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.7 2000/07/16 17:50:01 gerd
+ * Fixes in 'write'
+ *
+ * Revision 1.6 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.5 2000/07/14 13:56:48 gerd
+ * Added methods id_attribute_name and idref_attribute_names.
+ *
+ * Revision 1.4 2000/07/09 00:13:37 gerd
+ * Added methods gen_entity_names, par_entity_names.
+ *
+ * Revision 1.3 2000/07/04 22:10:55 gerd
+ * Update: collect_warnings -> drop_warnings.
+ * Update: Case ext_id = Anonymous.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ *
+ * Revision 1.18 2000/05/28 17:24:55 gerd
+ * Bugfixes.
+ *
+ * Revision 1.17 2000/05/27 19:21:25 gerd
+ * Implemented the changes of rev. 1.10 of markup_dtd.mli.
+ *
+ * Revision 1.16 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.15 2000/05/14 21:50:07 gerd
+ * Updated: change in internal_entity.
+ *
+ * Revision 1.14 2000/05/06 23:08:46 gerd
+ * It is possible to allow undeclared attributes.
+ *
+ * Revision 1.13 2000/05/01 20:42:46 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.12 2000/05/01 15:16:57 gerd
+ * The errors "undeclared parameter/general entities" are
+ * well-formedness errors, not validation errors.
+ *
+ * Revision 1.11 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.10 2000/01/20 20:53:47 gerd
+ * Changed such that it runs with Markup_entity's new interface.
+ *
+ * Revision 1.9 1999/11/09 22:15:41 gerd
+ * Added method "arbitrary_allowed".
+ *
+ * Revision 1.8 1999/09/01 22:52:22 gerd
+ * If 'allow_arbitrary' is in effect, no validation happens anymore.
+ *
+ * Revision 1.7 1999/09/01 16:21:24 gerd
+ * Added several warnings.
+ * The attribute type of "xml:space" is now strictly checked.
+ *
+ * Revision 1.6 1999/08/15 20:34:21 gerd
+ * Improved error messages.
+ * Bugfix: It is no longer allowed to create processing instructions
+ * with target "xml".
+ *
+ * Revision 1.5 1999/08/15 02:20:16 gerd
+ * New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.4 1999/08/15 00:21:39 gerd
+ * Comments have been updated.
+ *
+ * Revision 1.3 1999/08/14 22:12:52 gerd
+ * Several functions have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Bugfix: if two general entities with the same name are definied,
+ * the first counts, not the second.
+ *
+ * Revision 1.2 1999/08/11 14:56:35 gerd
+ * Declaration of the predfined entities {lt,gt,amp,quot,apos}
+ * is no longer forbidden; but the original definition cannot be overriddden.
+ * TODO: If these entities are redeclared with problematic values,
+ * the user should be warned.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+(*$ markup-dtd1.mli *)
+
+(**********************************************************************)
+(* *)
+(* Pxp_dtd: *)
+(* Object model of document type declarations *)
+(* *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ * declarations, entity declarations, notation
+ * declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ * of a content model and an attribute list
+ * declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+ (* Creation:
+ * new dtd
+ * creates a new, empty DTD object without any declaration, without a root
+ * element, without an ID.
+ *)
+ Pxp_types.collect_warnings ->
+ Pxp_types.rep_encoding ->
+ object
+ method root : string option
+ (* get the name of the root element if present *)
+
+ method set_root : string -> unit
+ (* set the name of the root element. This method can be invoked
+ * only once
+ *)
+
+ method id : Pxp_types.dtd_id option
+ (* get the identifier for this DTD *)
+
+ method set_id : Pxp_types.dtd_id -> unit
+ (* set the identifier. This method can be invoked only once *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* returns the encoding used for character representation *)
+
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - elements and notations that have not been added may be used in an
+ * arbitrary way; the methods "element" and "notation" indicate this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary contents are allowed or not. *)
+
+ method standalone_declaration : bool
+ (* Whether there is a 'standalone' declaration or not. Strictly
+ * speaking, this declaration is not part of the DTD, but it is
+ * included here because of practical reasons.
+ * If not set, this property defaults to 'false'.
+ *)
+
+ method set_standalone_declaration : bool -> unit
+ (* Sets the 'standalone' declaration. *)
+
+
+ method add_element : dtd_element -> unit
+ (* add the given element declaration to this DTD. Raises Not_found
+ * if there is already an element declaration with the same name.
+ *)
+
+ method add_gen_entity : Pxp_entity.entity -> bool -> unit
+ (* add_gen_entity e extdecl:
+ * add the entity 'e' as general entity to this DTD (general entities
+ * are those represented by &name;). If there is already a declaration
+ * with the same name, the second definition is ignored; as exception from
+ * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+ * may only be redeclared with a definition that is equivalent to the
+ * standard definition; otherwise a Validation_error is raised.
+ *
+ * 'extdecl': 'true' indicates that the entity declaration occurs in
+ * an external entity. (Used for the standalone check.)
+ *)
+
+ method add_par_entity : Pxp_entity.entity -> unit
+ (* add the given entity as parameter entity to this DTD (parameter
+ * entities are those represented by %name;). If there is already a
+ * declaration with the same name, the second definition is ignored.
+ *)
+
+ method add_notation : dtd_notation -> unit
+ (* add the given notation to this DTD. If there is already a declaration
+ * with the same name, a Validation_error is raised.
+ *)
+
+ method add_pinstr : proc_instruction -> unit
+ (* add the given processing instruction to this DTD. *)
+
+ method element : string -> dtd_element
+ (* looks up the element declaration with the given name. Raises
+ * Validation_error if the element cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method element_names : string list
+ (* returns the list of the names of all element declarations. *)
+
+ method gen_entity : string -> (Pxp_entity.entity * bool)
+ (* let e, extdecl = obj # gen_entity n:
+ * looks up the general entity 'e' with the name 'n'. Raises
+ * WF_error if the entity cannot be found.
+ * 'extdecl': indicates whether the entity declaration occured in an
+ * external entity.
+ *)
+
+ method gen_entity_names : string list
+ (* returns the list of all general entity names *)
+
+ method par_entity : string -> Pxp_entity.entity
+ (* looks up the parameter entity with the given name. Raises
+ * WF_error if the entity cannot be found.
+ *)
+
+ method par_entity_names : string list
+ (* returns the list of all parameter entity names *)
+
+ method notation : string -> dtd_notation
+ (* looks up the notation declaration with the given name. Raises
+ * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+ * has been invoked before, Unrestricted is raised instead.)
+ *)
+
+ method notation_names : string list
+ (* Returns the list of the names of all added notations *)
+
+ method pinstr : string -> proc_instruction list
+ (* looks up all processing instructions with the given target.
+ * The "target" is the identifier following "<?".
+ * Note: It is not possible to find out the exact position of the
+ * processing instruction.
+ *)
+
+ method pinstr_names : string list
+ (* Returns the list of the names (targets) of all added pinstrs *)
+
+ method validate : unit
+ (* ensures that the DTD is valid. This method is optimized such that
+ * actual validation is only performed if DTD has changed.
+ * If the DTD is invalid, mostly a Validation_error is raised,
+ * but other exceptions are possible, too.
+ *)
+
+ method only_deterministic_models : unit
+ (* Succeeds if all regexp content models are deterministic.
+ * Otherwise Validation_error.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
+ (* write_compact_as_latin1 os enc doctype:
+ * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a
+ * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
+ * only the declarations are written (the material within the
+ * square brackets).
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+
+ (*----------------------------------------*)
+ method invalidate : unit
+ (* INTERNAL METHOD *)
+ method warner : Pxp_types.collect_warnings
+ (* INTERNAL METHOD *)
+ end
+
+(*$-*)
+
+(*$ markup-dtd2.mli *)
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -> string ->
+ (* Creation:
+ * new dtd_element init_dtd init_name:
+ * creates a new dtd_element object for init_dtd with init_name.
+ * The strings are represented in the same encoding as init_dtd.
+ *)
+ object
+
+ method name : string
+ (* returns the name of the declared element *)
+
+ method externally_declared : bool
+ (* returns whether the element declaration occurs in an external
+ * entity.
+ *)
+
+ method content_model : Pxp_types.content_model_type
+ (* get the content model of this element declaration, or Unspecified *)
+
+ method content_dfa : Pxp_dfa.dfa_definition option
+ (* return the DFA of the content model if there is a DFA, or None.
+ * A DFA exists only for regexp style content models which are
+ * deterministic.
+ *)
+
+ method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
+ (* set_cm_and_extdecl cm extdecl:
+ * set the content model to 'cm'. Once the content model is not
+ * Unspecified, it cannot be set to a different value again.
+ * Furthermore, it is set whether the element occurs in an external
+ * entity ('extdecl').
+ *)
+
+ method encoding : Pxp_types.rep_encoding
+ (* Return the encoding of the strings *)
+
+ method allow_arbitrary : unit
+ (* After this method has been invoked, the object changes its behaviour:
+ * - attributes that have not been added may be used in an
+ * arbitrary way; the method "attribute" indicates this
+ * by raising Undeclared instead of Validation_error.
+ *)
+
+ method disallow_arbitrary : unit
+
+ method arbitrary_allowed : bool
+ (* Returns whether arbitrary attributes are allowed or not. *)
+
+ method attribute : string ->
+ Pxp_types.att_type * Pxp_types.att_default
+ (* get the type and default value of a declared attribute, or raise
+ * Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_violates_standalone_declaration :
+ string -> string option -> bool
+ (* attribute_violates_standalone_declaration name v:
+ * Checks whether the attribute 'name' violates the "standalone"
+ * declaration if it has value 'v'.
+ * The method returns true if:
+ * - The attribute declaration occurs in an external entity,
+ * and if one of the two conditions holds:
+ * - v = None, and there is a default for the attribute value
+ * - v = Some s, and the type of the attribute is not CDATA,
+ * and s changes if normalized according to the rules of the
+ * attribute type.
+ *
+ * The method raises Validation_error if the attribute does not exist.
+ * If 'arbitrary_allowed', the exception Undeclared is raised instead
+ * of Validation_error.
+ *)
+
+ method attribute_names : string list
+ (* get the list of all declared attributes *)
+
+ method names_of_required_attributes : string list
+ (* get the list of all attributes that are specified as required
+ * attributes
+ *)
+
+ method id_attribute_name : string option
+ (* Returns the name of the attribute with type ID, or None. *)
+
+ method idref_attribute_names : string list
+ (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+ method add_attribute : string ->
+ Pxp_types.att_type ->
+ Pxp_types.att_default ->
+ bool ->
+ unit
+ (* add_attribute name type default extdecl:
+ * add an attribute declaration for an attribute with the given name,
+ * type, and default value. If there is more than one declaration for
+ * an attribute name, the first declaration counts; the other declarations
+ * are ignored.
+ * 'extdecl': if true, the attribute declaration occurs in an external
+ * entity. This property is used to check the "standalone" attribute.
+ *)
+
+ method validate : unit
+ (* checks whether this element declaration (i.e. the content model and
+ * all attribute declarations) is valid for the associated DTD.
+ * Raises mostly Validation_error if the validation fails.
+ *)
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new dtd_notation a_name an_external_ID init_encoding
+ * creates a new dtd_notation object with the given name and the given
+ * external ID.
+ *)
+ object
+ method name : string
+ method ext_id : Pxp_types.ext_id
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write_compact_as_latin1 os enc:
+ * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded
+ * string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
+ (* Creation:
+ * new proc_instruction a_target a_value
+ * creates a new proc_instruction object with the given target string and
+ * the given value string.
+ * Note: A processing instruction is written as <?target value?>.
+ *)
+ object
+ method target : string
+ method value : string
+ method encoding : Pxp_types.rep_encoding
+
+ method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+ (* write os enc:
+ * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
+ *)
+
+ method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+ (* DEPRECATED METHOD; included only to keep compatibility with
+ * older versions of the parser
+ *)
+
+ method parse_pxp_option : (string * string * (string * string) list)
+ (* Parses a PI containing a PXP option. Such PIs are formed like:
+ * <?target option-name option-att="value" option-att="value" ... ?>
+ * The method returns a triple
+ * (target, option-name, [option-att, value; ...])
+ * or raises Error.
+ *)
+
+ end
+
+;;
+
+(*$-*)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/18 21:18:45 gerd
+ * Updated wrong comments for methods par_entity and gen_entity.
+ * These can raise WF_error and not Validation_error, and this is the
+ * correct behaviour.
+ *
+ * Revision 1.7 2000/07/25 00:30:01 gerd
+ * Added support for pxp:dtd PI options.
+ *
+ * Revision 1.6 2000/07/23 02:16:33 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.5 2000/07/16 16:34:41 gerd
+ * New method 'write', the successor of 'write_compact_as_latin1'.
+ *
+ * Revision 1.4 2000/07/14 13:56:49 gerd
+ * Added methods id_attribute_name and idref_attribute_names.
+ *
+ * Revision 1.3 2000/07/09 00:13:37 gerd
+ * Added methods gen_entity_names, par_entity_names.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_dtd.ml:
+ *
+ * Revision 1.11 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.10 2000/05/27 19:20:38 gerd
+ * Changed the interfaces for the standalone check: New
+ * methods: standalone_declaration, set_standalone_declaration,
+ * externally_declared, attribute_violates_standalone_declaration.
+ * The method set_content_model has been renamed to
+ * set_cm_and_extdecl; it now initializes also whether the element
+ * has been declared in an external entity.
+ * Methods add_gen_entity and gen_entity pass an additional
+ * boolean argument containing whether the declaration of the
+ * general entity happened in an external entity.
+ * Method add_attribute expects this argument, too, which
+ * states whether the declaration of the attribute happened in an
+ * external entity.
+ *
+ * Revision 1.9 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.8 2000/05/06 23:10:26 gerd
+ * allow_arbitrary for elements, too.
+ *
+ * Revision 1.7 2000/05/01 20:42:52 gerd
+ * New method write_compact_as_latin1.
+ *
+ * Revision 1.6 2000/03/11 22:58:15 gerd
+ * Updated to support Markup_codewriter.
+ *
+ * Revision 1.5 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.4 1999/11/09 22:15:41 gerd
+ * Added method "arbitrary_allowed".
+ *
+ * Revision 1.3 1999/09/01 16:21:56 gerd
+ * "dtd" classes have now an argument that passes a "warner".
+ *
+ * Revision 1.2 1999/08/15 02:20:23 gerd
+ * New feature: a DTD can allow arbitrary elements.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+(* TODO:
+ * - Wie verhindert man, dass ein internal entity eine XML-Dekl. im
+ * replacement text akzeptiert?
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_aux
+open Pxp_reader
+
+(* Hierarchy of parsing layers:
+ *
+ * - Parser: Pxp_yacc
+ * + gets input stream from the main entity object
+ * + checks most of the grammar
+ * + creates the DTD object as side-effect
+ * + creates the element tree as side-effect
+ * + creates further entity objects that are entered into the DTD
+ * - Entity layer: Pxp_entity
+ * + gets input stream from the lexers, or another entity object
+ * + handles entity references: if a reference is encountered the
+ * input stream is redirected such that the tokens come from the
+ * referenced entity object
+ * + handles conditional sections
+ * - Lexer layer: Pxp_lexers
+ * + gets input from lexbuffers created by resolvers
+ * + different lexers for different lexical contexts
+ * + a lexer returns pairs (token,lexid), where token is the scanned
+ * token, and lexid is the name of the lexer that must be used for
+ * the next token
+ * - Resolver layer: Pxp_entity
+ * + a resolver creates the lexbuf from some character source
+ * + a resolver recodes the input and handles the encoding scheme
+ *)
+
+(**********************************************************************)
+
+(* Variables of type 'state' are used to insert Begin_entity and End_entity
+ * tokens into the stream.
+ * - At_beginning: Nothing has been read so far
+ * - First_token tok: A Begin_entity has been inserted; and the next token
+ * is 'tok' which is not Eof. (Begin_entity/End_entity must not be inserted
+ * if the entity is empty.)
+ * - In_stream: After the first token has been read, but befor Eof.
+ * - At_end: Eof has been read, and End_entity has been returned.
+ *)
+
+type state =
+ At_beginning
+ | Inserted_begin_entity
+ | At_end
+;;
+
+
+(**********************************************************************)
+
+class virtual entity the_dtd the_name the_warner
+ init_errors_with_line_numbers init_encoding =
+ object (self)
+ (* This class prescribes the type of all entity objects. Furthermore,
+ * the default 'next_token' mechanism is implemented.
+ *)
+
+ (* 'init_errors_with_line_numbers': whether error messages contain line
+ * numbers or not.
+ * Calculating line numbers is expensive.
+ *)
+
+ val mutable dtd = the_dtd
+ val mutable name = the_name
+ val mutable warner = the_warner
+
+ val encoding = (init_encoding : rep_encoding)
+ val lexerset = Pxp_lexers.get_lexer_set init_encoding
+
+ method encoding = encoding
+ (* method lexerset = lexerset *)
+
+ val mutable manager = None
+ (* The current entity_manager, see below *)
+
+ method private manager =
+ ( match manager with
+ None -> assert false
+ | Some m -> m
+ : < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >
+ )
+
+ method set_manager m = manager <- Some m
+
+
+ val mutable lexbuf = Lexing.from_string ""
+ (* The lexical buffer currently used as character source. *)
+
+ val mutable prolog = None
+ (* Stores the initial <?xml ...?> token as PI_xml *)
+
+ val mutable prolog_pairs = []
+ (* If prolog <> None, these are the (name,value) pairs of the
+ * processing instruction.
+ *)
+
+
+ val mutable lex_id = Document
+ (* The name of the lexer that should be used for the next token *)
+
+ method set_lex_id id = lex_id <- lex_id
+
+
+
+ val mutable force_parameter_entity_parsing = false
+ (* 'true' forces that inner entities will always be embraced by
+ * Begin_entity and End_entity.
+ * 'false': the inner entity itself decides this
+ *)
+
+ val mutable check_text_declaration = true
+ (* 'true': It is checked that the <?xml..?> declaration matches the
+ * production TextDecl.
+ *)
+
+ val mutable normalize_newline = true
+ (* Whether this entity converts CRLF or CR to LF, or not *)
+
+
+ val mutable line = 1 (* current line *)
+ val mutable column = 0 (* current column *)
+ val mutable pos = 0 (* current absolute character position *)
+ val errors_with_line_numbers = init_errors_with_line_numbers
+
+ val mutable p_line = 1
+ val mutable p_column = 1
+
+ method line = p_line
+ method column = p_column
+
+
+ val mutable counts_as_external = false
+
+ method counts_as_external = counts_as_external
+ (* Whether the entity counts as external (for the standalone check). *)
+
+ method set_counts_as_external =
+ counts_as_external <- true
+
+
+ val mutable last_token = Bof
+ (* XXX
+ * These two variables are used to check that between certain pairs of
+ * tokens whitespaces exist. 'last_token' is simply the last token,
+ * but not Ignore, and not PERef (which both represent whitespace).
+ * 'space_seen' records whether Ignore or PERef was seen between this
+ * token and 'last_token'.
+ *)
+
+ val mutable deferred_token = None
+ (* If you set this to Some tl, the next invocations of
+ * next_token_from_entity will return the tokens in tl.
+ * This makes it possible to insert tokens into the stream.
+ *)
+
+ val mutable debug = false
+
+ method is_ndata = false
+ (* Returns if this entity is an NDATA (unparsed) entity *)
+
+ method name = name
+
+ method virtual open_entity : bool -> lexers -> unit
+ (* open_entity force_parsing lexid:
+ * opens the entity, and the first token is scanned by the lexer
+ * 'lexid'. 'force_parsing' forces that Begin_entity and End_entity
+ * tokens embrace the inner tokens of the entity; otherwise this
+ * depends on the entity.
+ * By opening an entity, reading tokens from it, and finally closing
+ * the entity, the inclusion methods "Included",
+ * "Included if validating", and "Included as PE" can be carried out.
+ * Which method is chosen depends on the 'lexid', i.e. the lexical
+ * context: 'lexid = Content' performs "Included (if validating)" (we
+ * are always validating); 'lexid = Declaration' performs
+ * "Included as PE". The difference is which tokens are recognized,
+ * and how spaces are handled.
+ * 'force_parsing' causes that a Begin_entity token is inserted before
+ * and an End_entity token is inserted after the entity. The yacc
+ * rules allow the Begin_entity ... End_entity brace only at certain
+ * positions; this is used to restrict the possible positions where
+ * entities may be included, and to guarantee that the entity matches
+ * a certain production of the grammar ("parsed entities").
+ * 'open_entity' is currently invoked with 'force_parsing = true'
+ * for toplevel nodes, for inclusion of internal general entities,
+ * and for inclusion of parameter entities into document entities.
+ * 'force_parsing = false' is used for all other cases: External
+ * entities add the Begin_entity/End_entity tokens anyway; internal
+ * entities do not. Especially internal parameter entities referenced
+ * from non-document entities do not add these tokens.
+ *)
+
+ method virtual close_entity : lexers
+ (* close_entity:
+ * closes the entity and returns the name of the lexer that must
+ * be used to scan the next token.
+ *)
+
+ method virtual replacement_text : (string * bool)
+ (* replacement_text:
+ * returns the replacement text of the entity, and as second value,
+ * whether the replacement text was constructed by referencing
+ * external entities (directly or indirectly).
+ * This method implements the inclusion method "Included in Literal".
+ *)
+
+
+ method lexbuf = lexbuf
+
+
+ method xml_declaration =
+ (* return the (name,value) pairs of the initial <?xml name=value ...?>
+ * processing instruction.
+ *)
+ match prolog with
+ None ->
+ None
+ | Some p ->
+ Some prolog_pairs
+
+
+ method set_debugging_mode m =
+ debug <- m
+
+ method private virtual set_encoding : string -> unit
+
+
+ method full_name =
+ name
+
+
+ method next_token =
+ (* read next token from this entity *)
+
+ match deferred_token with
+ Some toklist ->
+ ( match toklist with
+ [] ->
+ deferred_token <- None;
+ self # next_token
+ | tok :: toklist' ->
+ deferred_token <- Some toklist';
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok ^ " (deferred)");
+ tok
+ )
+ | None -> begin
+ let this_line = line
+ and this_column = column in
+ let this_pos = pos in
+ p_line <- this_line;
+ p_column <- this_column;
+ (* Read the next token from the appropriate lexer lex_id, and get the
+ * name lex_id' of the next lexer to be used.
+ *)
+ let tok, lex_id' =
+ match lex_id with
+ Document -> lexerset.scan_document lexbuf
+ | Document_type -> lexerset.scan_document_type lexbuf
+ | Content -> lexerset.scan_content lexbuf
+ | Within_tag -> lexerset.scan_within_tag lexbuf
+ | Declaration -> lexerset.scan_declaration lexbuf
+ | Content_comment -> lexerset.scan_content_comment lexbuf
+ | Decl_comment -> lexerset.scan_decl_comment lexbuf
+ | Document_comment -> lexerset.scan_document_comment lexbuf
+ | Ignored_section -> assert false
+ (* Ignored_section: only used by method next_ignored_token *)
+ in
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok);
+ (* Find out the number of lines and characters of the last line: *)
+ let n_lines, n_columns =
+ if errors_with_line_numbers then
+ count_lines (Lexing.lexeme lexbuf)
+ else
+ 0, (Lexing.lexeme_end lexbuf - Lexing.lexeme_start lexbuf)
+ in
+ line <- this_line + n_lines;
+ column <- if n_lines = 0 then this_column + n_columns else n_columns;
+ pos <- Lexing.lexeme_end lexbuf;
+ lex_id <- lex_id';
+ (* Throw Ignore and Comment away; Interpret entity references: *)
+ (* NOTE: Of course, references to general entities are not allowed
+ * everywhere; parameter references, too. This is already done by the
+ * lexers, i.e. &name; and %name; are recognized only where they
+ * are allowed.
+ *)
+
+ (* TODO: last_token is only used to detect Bof. Can be simplified *)
+
+ let at_bof = (last_token = Bof) in
+ last_token <- tok;
+
+ let tok' =
+ match tok with
+
+ (* Entity references: *)
+
+ | ERef n ->
+ let en, extdecl = dtd # gen_entity n in
+ if dtd # standalone_declaration && extdecl then
+ raise
+ (Validation_error
+ ("Reference to entity `" ^ n ^
+ "' violates standalone declaration"));
+ en # set_debugging_mode debug;
+ en # open_entity true lex_id;
+ self # manager # push_entity en;
+ en # next_token;
+ | PERef n ->
+ let en = dtd # par_entity n in
+ en # set_debugging_mode debug;
+ en # open_entity force_parameter_entity_parsing lex_id;
+ self # manager # push_entity en;
+ en # next_token;
+
+ (* Convert LineEnd to CharData *)
+ | LineEnd s ->
+ if normalize_newline then
+ CharData "\n"
+ else
+ CharData s
+
+ (* Also normalize CDATA sections *)
+ | Cdata value as cd ->
+ if normalize_newline then
+ Cdata(normalize_line_separators lexerset value)
+ else
+ cd
+
+ (* If there are CRLF sequences in a PI value, normalize them, too *)
+ | PI(name,value) as pi ->
+ if normalize_newline then
+ PI(name, normalize_line_separators lexerset value)
+ else
+ pi
+
+ (* Attribute values: If they are already normalized, they are turned
+ * into Attval_nl_normalized. This is detected by other code.
+ *)
+ | Attval value as av ->
+ if normalize_newline then
+ av
+ else
+ Attval_nl_normalized value
+
+ (* Another CRLF normalization case: Unparsed_string *)
+ | Unparsed_string value as ustr ->
+ if normalize_newline then
+ Unparsed_string(normalize_line_separators lexerset value)
+ else
+ ustr
+
+ (* These tokens require that the entity_id parameter is set: *)
+ | Doctype _ -> Doctype (self :> entity_id)
+ | Doctype_rangle _ ->Doctype_rangle(self :> entity_id)
+ | Dtd_begin _ -> Dtd_begin (self :> entity_id)
+ | Dtd_end _ -> Dtd_end (self :> entity_id)
+ | Decl_element _ -> Decl_element (self :> entity_id)
+ | Decl_attlist _ -> Decl_attlist (self :> entity_id)
+ | Decl_entity _ -> Decl_entity (self :> entity_id)
+ | Decl_notation _ ->Decl_notation (self :> entity_id)
+ | Decl_rangle _ -> Decl_rangle (self :> entity_id)
+ | Lparen _ -> Lparen (self :> entity_id)
+ | Rparen _ -> Rparen (self :> entity_id)
+ | RparenPlus _ -> RparenPlus (self :> entity_id)
+ | RparenStar _ -> RparenStar (self :> entity_id)
+ | RparenQmark _ -> RparenQmark (self :> entity_id)
+ | Conditional_begin _ -> Conditional_begin (self :> entity_id)
+ | Conditional_body _ -> Conditional_body (self :> entity_id)
+ | Conditional_end _ -> Conditional_end (self :> entity_id)
+ | Tag_beg (n,_) -> Tag_beg (n, (self :> entity_id))
+ | Tag_end (n,_) -> Tag_end (n, (self :> entity_id))
+
+ (* End of file: *)
+
+ | Eof ->
+ if debug then begin
+ prerr_endline ("- Entity " ^ name ^ " # handle_eof");
+ let tok = self # handle_eof in
+ prerr_endline ("- Entity " ^ name ^ " # handle_eof: returns " ^ string_of_tok tok);
+ tok
+ end
+ else
+ self # handle_eof;
+
+ (* The default case. *)
+
+ | _ ->
+ tok
+
+ in
+ if at_bof & tok <> Eof
+ then begin
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ " # handle_bof");
+ self # handle_bof tok'
+ end
+ else
+ tok'
+ end
+
+
+ (* 'handle_bof' and 'handle_eof' can be used as hooks. Behaviour:
+ *
+ * - Normally, the first token t is read in, and 'handle_bof t' is
+ * called. The return value of this method is what is returned to
+ * the user.
+ * - If the EOF has been reached, 'handle_eof' is called.
+ * - BUT: If the first token is already EOF, 'handle_eof' is called
+ * ONLY, and 'handle_bof' is NOT called.
+ *
+ * The default implementations:
+ * - handle_bof: does nothing
+ * - handle_eof: Pops the previous entity from the stack, switches back
+ * to this entity, and returns the next token of this entity.
+ *)
+
+
+ method private handle_bof tok =
+ tok
+
+
+ method private handle_eof =
+ let mng = self # manager in
+ begin try
+ mng # pop_entity;
+ let next_lex_id = self # close_entity in
+ let en = mng # current_entity in
+ en # set_lex_id next_lex_id;
+ en # next_token
+ with
+ Stack.Empty ->
+ (* The outermost entity is at EOF *)
+ Eof
+ end
+
+
+ method next_ignored_token =
+ (* used after <![ IGNORE *)
+
+ (* TODO: Do we need a test on deferred tokens here? *)
+
+ let this_line = line
+ and this_column = column in
+ let this_pos = pos in
+ let tok, lex_id' = lexerset.scan_ignored_section lexbuf in
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ ": " ^ string_of_tok tok ^ " (Ignored)");
+ let n_lines, n_columns = count_lines (Lexing.lexeme lexbuf) in
+ line <- this_line + n_lines;
+ column <- if n_lines = 0 then this_column + n_columns else n_columns;
+ pos <- Lexing.lexeme_end lexbuf;
+ match tok with
+ | Conditional_begin _ -> Conditional_begin (self :> entity_id)
+ | Conditional_end _ -> Conditional_end (self :> entity_id)
+ | _ -> tok
+
+
+ method process_xmldecl pl =
+ (* The parser calls this method just after the XML declaration
+ * <?xml ...?> has been detected.
+ * 'pl': This is the argument of the PI_xml token.
+ *)
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ " # process_xmldecl");
+ prolog <- Some pl;
+ prolog_pairs <- decode_xml_pi pl;
+ if check_text_declaration then
+ check_text_xml_pi prolog_pairs;
+ begin
+ try
+ let e = List.assoc "encoding" prolog_pairs in
+ self # set_encoding e
+ with
+ Not_found ->
+ self # set_encoding ""
+ end;
+
+
+ method process_missing_xmldecl =
+ (* The parser calls this method if the XML declaration is missing *)
+ if debug then
+ prerr_endline ("- Entity " ^ name ^ " # process_missing_xmldecl");
+ self # set_encoding ""
+
+
+ (* Methods for NDATA entities only: *)
+ method ext_id = (assert false : ext_id)
+ method notation = (assert false : string)
+
+ end
+;;
+
+
+class ndata_entity the_name the_ext_id the_notation init_encoding =
+ object (self)
+ (* An NDATA entity is very restricted; more or less you can only find out
+ * its external ID and its notation.
+ *)
+
+ val mutable name = the_name
+ val mutable ext_id = the_ext_id
+ val mutable notation = the_notation
+ val encoding = (init_encoding : rep_encoding)
+
+ method name = (name : string)
+ method ext_id = (ext_id : ext_id)
+ method notation = (notation : string)
+
+ method is_ndata = true
+
+ method encoding = encoding
+
+
+ val mutable counts_as_external = false
+
+ method counts_as_external = counts_as_external
+ (* Whether the entity counts as external (for the standalone check). *)
+
+ method set_counts_as_external =
+ counts_as_external <- true
+
+
+ method set_manager (m : < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method set_lex_id (id : lexers) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method line =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : int )
+
+ method column =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : int )
+
+ method full_name =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : string )
+
+ method private set_encoding (_:string) =
+ assert false
+
+ method xml_declaration = (None : (string*string) list option)
+
+ method set_debugging_mode (_:bool) = ()
+
+ method open_entity (_:bool) (_:lexers) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method close_entity =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : lexers )
+
+ method replacement_text =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : (string * bool) )
+
+ method lexbuf =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : Lexing.lexbuf )
+
+ method next_token =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : token )
+
+ method next_ignored_token =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : token )
+
+ method process_xmldecl (pl:prolog_token list) =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ method process_missing_xmldecl =
+ ( raise (Validation_error ("Invalid reference to NDATA entity " ^ name))
+ : unit )
+
+ end
+;;
+
+
+class external_entity the_resolver the_dtd the_name the_warner the_ext_id
+ the_p_special_empty_entities
+ init_errors_with_line_numbers
+ init_encoding
+ =
+ object (self)
+ inherit entity
+ the_dtd the_name the_warner init_errors_with_line_numbers
+ init_encoding
+ as super
+
+ (* An external entity gets the lexbuf that is used as character source
+ * from a resolver.
+ * Furthermore, before the first token an Begin_entity is inserted, and
+ * before Eof an End_entity token is inserted into the stream. This done
+ * always regardless of the argument 'force_parsing' of the method
+ * 'open_entity'.
+ *
+ * 'the_p_internal_subset': see class internal_entity
+ * 'the_p_special_empty_entities': if true, a Begin_entity/End_entity
+ * brace is left out if the entity is otherwise empty.
+ *)
+
+ val resolver = (the_resolver : resolver)
+ val ext_id = (the_ext_id : ext_id)
+
+ val p_special_empty_entities = (the_p_special_empty_entities : bool)
+
+ val mutable resolver_is_open = false
+ (* Track if the resolver is open. This is also used to find recursive
+ * references of entities.
+ *)
+
+ val mutable state = At_beginning
+
+ initializer
+ counts_as_external <- true;
+
+
+ method private set_encoding e =
+ assert resolver_is_open;
+ resolver # change_encoding e
+
+
+ method full_name =
+ name ^
+ match ext_id with
+ System s -> " = SYSTEM \"" ^ s ^ "\""
+ | Public(p,s) -> " = PUBLIC \"" ^ p ^ "\" \"" ^ s ^ "\""
+ | Anonymous -> " = ANONYMOUS"
+
+
+ method open_entity force_parsing init_lex_id =
+ (* Note that external entities are always parsed, i.e. Begin_entity
+ * and End_entity tokens embrace the inner tokens to force that
+ * the entity is only called where the syntax allows it.
+ *)
+ if resolver_is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+ let lex =
+ try
+ resolver # open_in ext_id
+ with
+ Pxp_reader.Not_competent ->
+ raise(Error ("No input method available for this external entity: " ^
+ self # full_name))
+ | Pxp_reader.Not_resolvable Not_found ->
+ raise(Error ("Unable to open the external entity: " ^
+ self # full_name))
+ | Pxp_reader.Not_resolvable e ->
+ raise(Error ("Unable to open the external entity: " ^
+ self # full_name ^ "; reason: " ^
+ string_of_exn e))
+ in
+ resolver_is_open <- true;
+ lexbuf <- lex;
+ prolog <- None;
+ lex_id <- init_lex_id;
+ state <- At_beginning;
+ line <- 1;
+ column <- 0;
+ pos <- 0;
+ last_token <- Bof;
+ normalize_newline <- true;
+
+
+ method private handle_bof tok =
+ (* This hook is only called if the stream is not empty. *)
+ deferred_token <- Some [ tok ];
+ state <- Inserted_begin_entity;
+ Begin_entity
+
+
+ method private handle_eof =
+ (* This hook is called if the end of the stream is reached *)
+ match state with
+ At_beginning ->
+ (* This is only possible if the stream is empty. *)
+ if p_special_empty_entities then begin
+ (* Continue immediately with the next token *)
+ state <- At_end;
+ super # handle_eof
+ end
+ else begin
+ (* Insert Begin_entity / End_entity *)
+ deferred_token <- Some [ End_entity ];
+ state <- At_end;
+ Begin_entity;
+ (* After these two token have been processed, the lexer
+ * is called again, and it will return another Eof.
+ *)
+ end
+ | Inserted_begin_entity ->
+ (* Insert End_entity, too. *)
+ state <- At_end;
+ End_entity;
+ | At_end ->
+ (* Continue with the next token: *)
+ super # handle_eof
+
+
+ method close_entity =
+ if not resolver_is_open then
+ failwith ("External entity " ^ name ^ " not open");
+ resolver # close_in;
+ resolver_is_open <- false;
+ lex_id
+
+
+ method replacement_text =
+ (* Return the replacement text of the entity. The method used for this
+ * is more or less the same as for internal entities; i.e. character
+ * and parameter entities are resolved immediately. In addition to that,
+ * external entities may begin with an "xml" processing instruction
+ * which is considered not to be part of the replacement text.
+ *)
+ if resolver_is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+ let lex = resolver # open_in ext_id in
+ resolver_is_open <- true;
+ lexbuf <- lex;
+ prolog <- None;
+ (* arbitrary: lex_id <- init_lex_id; *)
+ state <- At_beginning;
+ line <- 1;
+ column <- 0;
+ pos <- 0;
+ last_token <- Bof;
+ (* First check if the first token of 'lex' is <?xml...?> *)
+ begin match lexerset.scan_only_xml_decl lex with
+ PI_xml pl ->
+ self # process_xmldecl pl
+ | Eof ->
+ (* This only means that the first token was not <?xml...?>;
+ * the "Eof" token represents the empty string.
+ *)
+ self # process_missing_xmldecl
+ | _ ->
+ (* Must not happen. *)
+ assert false
+ end;
+ (* Then create the replacement text. *)
+ let rec scan_and_expand () =
+ match lexerset.scan_dtd_string lexbuf with
+ ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
+ | CRef(-1) -> "\n" ^ scan_and_expand()
+ | CRef(-2) -> "\n" ^ scan_and_expand()
+ | CRef(-3) -> "\n" ^ scan_and_expand()
+ | CRef k -> character encoding warner k ^ scan_and_expand()
+ | CharData x -> x ^ scan_and_expand()
+ | PERef n ->
+ let en = dtd # par_entity n in
+ let (x,_) = en # replacement_text in
+ x ^ scan_and_expand()
+ | Eof ->
+ ""
+ | _ ->
+ assert false
+ in
+ let rtext = scan_and_expand() in
+ resolver # close_in;
+ resolver_is_open <- false;
+ rtext, true
+ (* TODO:
+ * - The replaced text is not parsed [VALIDATION WEAKNESS]
+ *)
+ end
+;;
+
+
+class document_entity the_resolver the_dtd the_name the_warner the_ext_id
+ init_errors_with_line_numbers
+ init_encoding
+ =
+ object (self)
+ inherit external_entity the_resolver the_dtd the_name the_warner
+ the_ext_id false init_errors_with_line_numbers
+ init_encoding
+
+ (* A document entity is an external entity that does not allow
+ * conditional sections, and that forces that internal parameter entities
+ * are properly nested.
+ *)
+
+ initializer
+ force_parameter_entity_parsing <- true;
+ check_text_declaration <- false;
+
+ method counts_as_external = false
+ (* Document entities count never as external! *)
+ end
+;;
+
+
+class internal_entity the_dtd the_name the_warner the_literal_value
+ the_p_internal_subset init_errors_with_line_numbers
+ init_is_parameter_entity
+ init_encoding
+ =
+ (* An internal entity uses a "literal entity value" as character source.
+ * This value is first expanded and preprocessed, i.e. character and
+ * parameter references are expanded.
+ *
+ * 'the_p_internal_subset': indicates that the entity is declared in the
+ * internal subset. Such entity declarations are not allowed to contain
+ * references to parameter entities.
+ * 'init_is_parameter_entity': whether this is a parameter entity or not
+ *)
+
+ object (self)
+ inherit entity
+ the_dtd the_name the_warner init_errors_with_line_numbers
+ init_encoding
+ as super
+
+ val p_internal_subset = the_p_internal_subset
+
+ val mutable replacement_text = ""
+ val mutable contains_external_references = false
+ val mutable p_parsed_actually = false
+ val mutable is_open = false
+ val mutable state = At_beginning
+ val mutable is_parameter_entity = init_is_parameter_entity
+
+
+ initializer
+ let lexbuf = Lexing.from_string the_literal_value in
+ let rec scan_and_expand () =
+ match lexerset.scan_dtd_string lexbuf with
+ ERef n -> "&" ^ n ^ ";" ^ scan_and_expand()
+ | CRef(-1) -> "\r\n" ^ scan_and_expand()
+ | CRef(-2) -> "\r" ^ scan_and_expand()
+ | CRef(-3) -> "\n" ^ scan_and_expand()
+ | CRef k -> character encoding warner k ^ scan_and_expand()
+ | CharData x -> x ^ scan_and_expand()
+ | PERef n ->
+ if p_internal_subset then
+ raise(WF_error("Restriction of the internal subset: parameter entity not allowed here"));
+ let en = dtd # par_entity n in
+ let (x, extref) = en # replacement_text in
+ contains_external_references <-
+ contains_external_references or extref;
+ x ^ scan_and_expand()
+ | Eof ->
+ ""
+ | _ ->
+ assert false
+ in
+ is_open <- true;
+ replacement_text <- scan_and_expand();
+ is_open <- false;
+ normalize_newline <- false;
+ counts_as_external <- false;
+
+
+ method process_xmldecl (pl:prolog_token list) =
+ raise(Validation_error("The encoding cannot be changed in internal entities"))
+
+
+ method process_missing_xmldecl =
+ ()
+
+
+ method private set_encoding e =
+ (* Ignored if e = "" *)
+ assert(e = "");
+
+
+ method open_entity force_parsing init_lex_id =
+ if is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+
+ p_parsed_actually <- force_parsing;
+ lexbuf <- Lexing.from_string
+ (if is_parameter_entity then
+ (" " ^ replacement_text ^ " ")
+ else
+ replacement_text);
+ prolog <- None;
+ lex_id <- init_lex_id;
+ state <- At_beginning;
+ is_open <- true;
+ line <- 1;
+ column <- 0;
+ pos <- 0;
+ last_token <- Eof;
+
+
+ method private handle_bof tok =
+ (* This hook is only called if the stream is not empty. *)
+ if p_parsed_actually then begin
+ deferred_token <- Some [ tok ];
+ state <- Inserted_begin_entity;
+ Begin_entity
+ end
+ else begin
+ state <- At_end;
+ tok
+ end
+
+
+ method private handle_eof =
+ (* This hook is called if the end of the stream is reached *)
+ match state with
+ At_beginning ->
+ (* This is only possible if the stream is empty. *)
+ if p_parsed_actually then begin
+ (* Insert Begin_entity / End_entity *)
+ deferred_token <- Some [ End_entity ];
+ state <- At_end;
+ Begin_entity;
+ (* After these two token have been processed, the lexer
+ * is called again, and it will return another Eof.
+ *)
+ end
+ else begin
+ (* Continue immediately with the next token *)
+ state <- At_end;
+ super # handle_eof
+ end
+ | Inserted_begin_entity ->
+ (* Insert End_entity, too. *)
+ state <- At_end;
+ End_entity;
+ | At_end ->
+ (* Continue with the next token: *)
+ super # handle_eof
+
+
+ method close_entity =
+ if not is_open then
+ failwith ("Internal entity " ^ name ^ " not open");
+ is_open <- false;
+ lex_id
+
+
+ method replacement_text =
+ if is_open then
+ raise(Validation_error("Recursive reference to entity `" ^ name ^ "'"));
+ replacement_text, contains_external_references
+ end
+;;
+
+(**********************************************************************)
+
+(* An 'entity_manager' is a stack of entities, where the topmost entity
+ * is the currently active entity, the second entity is the entity that
+ * referred to the active entity, and so on.
+ *
+ * The entity_manager can communicate with the currently active entity.
+ *
+ * The entity_manager provides an interface for the parser; the functions
+ * returning the current token and the next token are exported.
+ *)
+
+class entity_manager (init_entity : entity) =
+ object (self)
+ val mutable entity_stack = Stack.create()
+ val mutable current_entity = init_entity
+ val mutable current_entity's_full_name = lazy (init_entity # full_name)
+
+ val mutable yy_get_next_ref = ref (fun () -> assert false)
+
+ initializer
+ init_entity # set_manager (self :>
+ < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >
+ );
+ yy_get_next_ref := (fun () -> init_entity # next_token)
+
+ method push_entity e =
+ e # set_manager (self :>
+ < current_entity : entity;
+ pop_entity : unit;
+ push_entity : entity -> unit >
+ );
+ Stack.push (current_entity, current_entity's_full_name) entity_stack;
+ current_entity <- e;
+ current_entity's_full_name <- lazy (e # full_name);
+ yy_get_next_ref := (fun () -> e # next_token);
+
+ method pop_entity =
+ (* May raise Stack.Empty *)
+ let e, e_name = Stack.pop entity_stack in
+ current_entity <- e;
+ current_entity's_full_name <- e_name;
+ yy_get_next_ref := (fun () -> e # next_token);
+
+
+
+ method position_string =
+ (* Gets a string describing the position of the last token;
+ * includes an entity backtrace
+ *)
+ let b = Buffer.create 200 in
+ Buffer.add_string b
+ ("In entity " ^ current_entity # full_name
+ ^ ", at line " ^ string_of_int (current_entity # line)
+ ^ ", position " ^ string_of_int (current_entity # column)
+ ^ ":\n");
+ Stack.iter
+ (fun (e, e_name) ->
+ Buffer.add_string b
+ ("Called from entity " ^ Lazy.force e_name
+ ^ ", line " ^ string_of_int (e # line)
+ ^ ", position " ^ string_of_int (e # column)
+ ^ ":\n");
+ )
+ entity_stack;
+ Buffer.contents b
+
+
+ method position =
+ (* Returns the triple (full_name, line, column) of the last token *)
+ Lazy.force current_entity's_full_name,
+ current_entity # line,
+ current_entity # column
+
+
+ method current_entity_counts_as_external =
+ (* Whether the current entity counts as external to the main
+ * document for the purpose of stand-alone checks.
+ *)
+ (* TODO: improve performance *)
+ let is_external = ref false in
+ let check (e, _) =
+ if e # counts_as_external then begin
+ is_external := true;
+ end;
+ in
+ check (current_entity,());
+ Stack.iter check entity_stack;
+ !is_external
+
+
+ method current_entity = current_entity
+
+ method yy_get_next_ref = yy_get_next_ref
+
+ end
+;;
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/07/14 13:55:00 gerd
+ * Cosmetic changes.
+ *
+ * Revision 1.5 2000/07/09 17:51:50 gerd
+ * Entities return now the beginning of a token as its
+ * position.
+ * New method 'position' for entity_manager.
+ *
+ * Revision 1.4 2000/07/09 01:05:04 gerd
+ * Exported methods 'ext_id' and 'notation' anyway.
+ *
+ * Revision 1.3 2000/07/08 16:28:05 gerd
+ * Updated: Exception 'Not_resolvable' is taken into account.
+ *
+ * Revision 1.2 2000/07/04 22:12:47 gerd
+ * Update: Case ext_id = Anonymous.
+ * Update: Handling of the exception Not_competent when reading
+ * from a resolver.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_entity.ml:
+ *
+ * Revision 1.27 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.26 2000/05/28 17:24:55 gerd
+ * Bugfixes.
+ *
+ * Revision 1.25 2000/05/27 19:23:32 gerd
+ * The entities store whether they count as external with
+ * respect to the standalone check: New methods counts_as_external
+ * and set_counts_as_external.
+ * The entity manager can find out whether the current
+ * entity counts as external: method current_entity_counts_as_external.
+ *
+ * Revision 1.24 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.23 2000/05/14 21:51:24 gerd
+ * Change: Whitespace is handled by the grammar, and no longer
+ * by the entity.
+ *
+ * Revision 1.22 2000/05/14 17:50:54 gerd
+ * Updates because of changes in the token type.
+ *
+ * Revision 1.21 2000/05/09 00:02:44 gerd
+ * Conditional sections are now recognized by the parser.
+ * There seem some open questions; see the TODO comments!
+ *
+ * Revision 1.20 2000/05/08 21:58:22 gerd
+ * Introduced entity_manager as communication object between
+ * the parser and the currently active entity.
+ * New hooks handle_bof and handle_eof.
+ * Removed "delegated entities". The entity manager contains
+ * the stack of open entities.
+ * Changed the way Begin_entity and End_entity are inserted.
+ * This is now done by handle_bof and handle_eof.
+ * The XML declaration is no longer detected by the entity.
+ * This is now done by the parser.
+ *
+ * Revision 1.19 2000/05/01 15:18:44 gerd
+ * Improved CRLF handling in the replacement text of entities.
+ * Changed one error message.
+ *
+ * Revision 1.18 2000/04/30 18:18:39 gerd
+ * Bugfixes: The conversion of CR and CRLF to LF is now hopefully
+ * done right. The new variable "normalize_newline" indicates whether
+ * normalization must happen for that type of entity. The normalization
+ * if actually carried out separately for every token that needs it.
+ *
+ * Revision 1.17 2000/03/13 23:42:38 gerd
+ * Removed the resolver classes, and put them into their
+ * own module (Markup_reader).
+ *
+ * Revision 1.16 2000/02/22 01:06:58 gerd
+ * Bugfix: Resolvers are properly re-initialized. This bug caused
+ * that entities could not be referenced twice in the same document.
+ *
+ * Revision 1.15 2000/01/20 20:54:11 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.14 2000/01/08 18:59:03 gerd
+ * Corrected the string resolver.
+ *
+ * Revision 1.13 1999/09/01 22:58:23 gerd
+ * Method warn_not_latin1 raises Illegal_character if the character
+ * does not match the Char production.
+ * External entities that are not document entities check if the
+ * <?xml...?> declaration at the beginning matches the TextDecl production.
+ * Method xml_declaration has type ... list option, not ... list.
+ * Tag_beg and Tag_end now carry an entity_id with them.
+ * The code to check empty entities has changed. That the Begin_entity/
+ * End_entity pair is not to be added must be explicitly turned on. See the
+ * description of empty entity handling in design.txt.
+ * In internal subsets entity declarations are not allowed to refer
+ * to parameter entities. The internal_entity class can do this now.
+ * The p_parsed parameter of internal_entity has gone. It was simply
+ * superflous.
+ *
+ * Revision 1.12 1999/09/01 16:24:13 gerd
+ * The method replacement_text returns the text as described for
+ * "included in literal". The former behaviour has been dropped to include
+ * a leading and a trailing space character for parameter entities.
+ * Bugfix: When general entities are included, they are always parsed.
+ *
+ * Revision 1.11 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.10 1999/08/19 01:06:41 gerd
+ * Improved error messages: external entities print their
+ * ext id, too
+ *
+ * Revision 1.9 1999/08/15 20:35:48 gerd
+ * Improved error messages.
+ * Before the tokens Plus, Star, Qmark space is not allowed any longer.
+ * Detection of recursive entity references is a bit cleaner.
+ *
+ * Revision 1.8 1999/08/15 15:33:44 gerd
+ * Revised whitespace checking: At certain positions there must be
+ * white space. These checks cannot be part of the lexer, as %entity; counts
+ * as white space. They cannot be part of the yacc parser because one look-ahead
+ * token would not suffice if we did that. So these checks must be done by the
+ * entity layer. Luckily, the rules are simple: There are simply a number of
+ * token pairs between which white space must occur independently of where
+ * these token have been found. Two variables, "space_seen", and "last_token"
+ * have been added in order to check these rules.
+ *
+ * Revision 1.7 1999/08/15 00:41:06 gerd
+ * The [ token of conditional sections is now allowed to occur
+ * in a different entity.
+ *
+ * Revision 1.6 1999/08/15 00:29:02 gerd
+ * The method "attlist_replacement_text" has gone. There is now a
+ * more general "replacement_text" method that computes the replacement
+ * text for both internal and external entities. Additionally, this method
+ * returns whether references to external entities have been resolved;
+ * this is checked in the cases where formerly "attlist_replacement_text"
+ * was used as it is not allowed everywhere.
+ * Entities have a new slot "need_spaces" that indicates that the
+ * next token must be white space or a parameter reference. The problem
+ * was that "<!ATTLIST%e;" is legal because when including parameter
+ * entities white space is added implicitly. Formerly, the white space
+ * was expected by the underlying lexer; now the lexer does not check
+ * anymore that "<!ATTLIST" is followed by white space because the lexer
+ * cannot handle parameter references. Because of this, the check on
+ * white space must be done by the entity.
+ *
+ * Revision 1.5 1999/08/14 22:57:19 gerd
+ * It is allowed that external entities are empty because the
+ * empty string is well-parsed for both declarations and contents. Empty
+ * entities can be referenced anywhere because the references are replaced
+ * by nothing. Because of this, the Begin_entity...End_entity brace is only
+ * inserted if the entity is non-empty. (Otherwise references to empty
+ * entities would not be allowed anywhere.)
+ * As a consequence, the grammar has been changed such that a
+ * single Eof is equivalent to Begin_entity,End_entity without content.
+ *
+ * Revision 1.4 1999/08/14 22:11:19 gerd
+ * Several objects have now a "warner" as argument which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Previously, the resolvers had features in order to warn about
+ * such characters; this has been removed.
+ * UTF-8 streams can be read even if they contain characters
+ * that cannot be represented by 16 bits.
+ * The buffering used in the resolvers is now solved in a
+ * cleaner way; the number of characters that are expected to be read
+ * from a source can be limited. This removes a bug with UTF-16 streams
+ * that previously lead to wrong exceptions; and the buffering is more
+ * efficient, too.
+ *
+ * Revision 1.3 1999/08/11 14:58:53 gerd
+ * Some more names for encodings are allowed, such as "utf8" instead
+ * of the standard name "UTF-8".
+ * 'resolve_as_file' interprets relative file names as relative to
+ * the "parent" resolver.
+ *
+ * Revision 1.2 1999/08/10 21:35:07 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type lexers =
+ Document
+ | Document_type
+ | Content
+ | Within_tag
+ | Declaration
+ | Content_comment
+ | Decl_comment
+ | Document_comment
+ | Ignored_section
+
+
+type prolog_token =
+ Pro_name of string
+ | Pro_eq (* "=" *)
+ | Pro_string of string (* "..." or '...' *)
+ | Pro_eof
+
+
+type entity_id = < >
+ (* The class without properties; but you can still compare if two objects
+ * are the same.
+ *)
+
+type token =
+ | Begin_entity (* Beginning of entity *)
+ | End_entity (* End of entity *)
+ | Comment_begin (* <!-- *)
+ | Comment_material of string (* within a comment *)
+ | Comment_end (* --> *)
+ | Ignore (* ignored whitespace *)
+ | Eq (* = *)
+ | Rangle (* > as tag delimiter *)
+ | Rangle_empty (* /> as tag delimiter *)
+ | Percent (* % followed by space in declaration *)
+ | Plus (* + in declaration *)
+ | Star (* * in declaration *)
+ | Bar (* | in declaration *)
+ | Comma (* , in declaration *)
+ | Qmark (* ? in declaration *)
+ | Pcdata (* #PCDATA in declaration *)
+ | Required (* #REQUIRED in declaration *)
+ | Implied (* #IMPLIED in declaration *)
+ | Fixed (* #FIXED in declaration *)
+ | Bof (* A marker for 'beginning of file' *)
+ | Eof (* End of file *)
+ | Conditional_begin of entity_id (* <![ in declaration *)
+ | Conditional_body of entity_id (* [ in declaration *)
+ | Conditional_end of entity_id (* ]]> in declaration *)
+ | Doctype of entity_id (* <!DOCTYPE *)
+ | Doctype_rangle of entity_id (* > as DOCTYPE delimiter *)
+ | Dtd_begin of entity_id (* '[' after DOCTYPE *)
+ | Dtd_end of entity_id (* ']' *)
+ | Decl_element of entity_id (* <!ELEMENT *)
+ | Decl_attlist of entity_id (* <!ATTLIST *)
+ | Decl_entity of entity_id (* <!ENTITY *)
+ | Decl_notation of entity_id (* <!NOTATION *)
+ | Decl_rangle of entity_id (* > *)
+ | Lparen of entity_id (* ( in declaration *)
+ | Rparen of entity_id (* ) in declaration *)
+ | RparenPlus of entity_id (* )+ in declaration *)
+ | RparenStar of entity_id (* )* in declaration *)
+ | RparenQmark of entity_id (* )? in declaration *)
+
+ | Tag_beg of (string*entity_id) (* <name *)
+ | Tag_end of (string*entity_id) (* </name *)
+
+ | PI of (string*string) (* <?name ... ?> *)
+ | PI_xml of (prolog_token list) (* <?xml ...?> *)
+ | Cdata of string (* <![CDATA[...]]> *)
+ | CRef of int (* &#digits; *)
+ | ERef of string (* &name; *)
+ | PERef of string (* %name; *)
+ | CharData of string (* any characters not otherwise matching *)
+ | LineEnd of string
+ | Name of string (* name *)
+ | Nametoken of string (* nmtoken but not name *)
+ | Attval of string (* attribute value; may contain entity refs *)
+ | Attval_nl_normalized of string
+ | Unparsed_string of string (* "data" or 'data' *)
+
+
+(**********************************************************************)
+(* debugging *)
+
+let string_of_tok tok =
+ match tok with
+ Begin_entity -> "Begin_entity"
+ | End_entity -> "End_entity"
+ | Doctype _ -> "Doctype"
+ | Doctype_rangle _ -> "Doctype_rangle"
+ | Comment_begin -> "Comment_begin"
+ | Comment_end -> "Comment_end"
+ | Comment_material _ -> "Comment_material"
+ | Rangle -> "Rangle"
+ | Rangle_empty -> "Rangle_empty"
+ | Ignore -> "Ignore"
+ | Eq -> "Eq"
+ | Dtd_begin _ -> "Dtd_begin"
+ | Dtd_end _ -> "Dtd_end"
+ | Conditional_begin _ -> "Conditional_begin"
+ | Conditional_body _ -> "Conditional_body"
+ | Conditional_end _ -> "Conditional_end"
+ | Percent -> "Percent"
+ | Lparen _ -> "Lparen"
+ | Rparen _ -> "Rparen"
+ | Plus -> "Plus"
+ | Star -> "Star"
+ | Bar -> "Bar"
+ | Comma -> "Comma"
+ | Qmark -> "Qmark"
+ | Pcdata -> "Pcdata"
+ | Required -> "Required"
+ | Implied -> "Implied"
+ | Fixed -> "Fixed"
+ | Decl_element _ -> "Decl_element"
+ | Decl_attlist _ -> "Decl_attlist"
+ | Decl_entity _ -> "Decl_entity"
+ | Decl_notation _ -> "Decl_notation"
+ | Decl_rangle _ -> "Decl_rangle"
+ | RparenPlus _ -> "RparenPlus"
+ | RparenStar _ -> "RparenStar"
+ | RparenQmark _ -> "RparenQmark"
+ | Bof -> "Bof"
+ | Eof -> "Eof"
+ | PI _ -> "PI"
+ | PI_xml _ -> "PI_xml"
+ | Tag_beg _ -> "Tag_beg"
+ | Tag_end _ -> "Tag_end"
+ | Cdata _ -> "Cdata"
+ | CRef _ -> "CRef"
+ | ERef _ -> "ERef"
+ | PERef _ -> "PERef"
+ | CharData _ -> "CharData"
+ | Name _ -> "Name"
+ | Nametoken _ -> "Nametoken"
+ | Attval _ -> "Attval"
+ | Attval_nl_normalized _ -> "Attval_nl_normalized"
+ | Unparsed_string _ -> "Unparsed_string"
+ | LineEnd _ -> "LineEnd"
+
+
+type lexer_set =
+ { lex_encoding : Pxp_types.rep_encoding;
+ scan_document : Lexing.lexbuf -> (token * lexers);
+ scan_content : Lexing.lexbuf -> (token * lexers);
+ scan_within_tag : Lexing.lexbuf -> (token * lexers);
+ scan_document_type : Lexing.lexbuf -> (token * lexers);
+ scan_declaration : Lexing.lexbuf -> (token * lexers);
+ scan_content_comment : Lexing.lexbuf -> (token * lexers);
+ scan_decl_comment : Lexing.lexbuf -> (token * lexers);
+ scan_document_comment: Lexing.lexbuf -> (token * lexers);
+ scan_ignored_section : Lexing.lexbuf -> (token * lexers);
+ scan_xml_pi : Lexing.lexbuf -> prolog_token;
+ scan_dtd_string : Lexing.lexbuf -> token;
+ scan_content_string : Lexing.lexbuf -> token;
+ scan_name_string : Lexing.lexbuf -> token;
+ scan_only_xml_decl : Lexing.lexbuf -> token;
+ scan_for_crlf : Lexing.lexbuf -> token;
+ }
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/18 20:14:31 gerd
+ * Comment -> Comment_begin, Comment_material, Comment_end.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_lexer_types.ml:
+ *
+ * Revision 1.6 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.5 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.4 2000/05/14 17:45:36 gerd
+ * Bugfix.
+ *
+ * Revision 1.3 2000/05/14 17:35:12 gerd
+ * Conditional_begin, _end, and _body have an entity_id.
+ *
+ * Revision 1.2 2000/05/08 21:59:06 gerd
+ * New token Bof (beginning of file).
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * DERIVED FROM REVISION 1.4 of markup_lexer_types_shadow.ml
+ *
+ * Revision 1.4 2000/04/30 18:19:04 gerd
+ * Added new tokens.
+ *
+ * Revision 1.3 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.2 1999/08/10 21:35:08 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type lexers =
+ Document
+ | Document_type
+ | Content
+ | Within_tag
+ | Declaration
+ | Content_comment
+ | Decl_comment
+ | Document_comment
+ | Ignored_section
+
+
+type prolog_token =
+ Pro_name of string
+ | Pro_eq (* "=" *)
+ | Pro_string of string (* "..." or '...' *)
+ | Pro_eof
+
+type entity_id = < >
+ (* The class without properties; but you can still compare if two objects
+ * are the same.
+ *)
+
+type token =
+ | Begin_entity (* Beginning of entity *)
+ | End_entity (* End of entity *)
+ | Comment_begin (* <!-- *)
+ | Comment_material of string (* within a comment *)
+ | Comment_end (* --> *)
+ | Ignore (* ignored whitespace *)
+ | Eq (* = *)
+ | Rangle (* > as tag delimiter *)
+ | Rangle_empty (* /> as tag delimiter *)
+ | Percent (* % followed by space in declaration *)
+ | Plus (* + in declaration *)
+ | Star (* * in declaration *)
+ | Bar (* | in declaration *)
+ | Comma (* , in declaration *)
+ | Qmark (* ? in declaration *)
+ | Pcdata (* #PCDATA in declaration *)
+ | Required (* #REQUIRED in declaration *)
+ | Implied (* #IMPLIED in declaration *)
+ | Fixed (* #FIXED in declaration *)
+ | Bof (* A marker for 'beginning of file' *)
+ | Eof (* End of file *)
+ | Conditional_begin of entity_id (* <![ in declaration *)
+ | Conditional_body of entity_id (* [ in declaration *)
+ | Conditional_end of entity_id (* ]]> in declaration *)
+ | Doctype of entity_id (* <!DOCTYPE *)
+ | Doctype_rangle of entity_id (* > as DOCTYPE delimiter *)
+ | Dtd_begin of entity_id (* '[' after DOCTYPE *)
+ | Dtd_end of entity_id (* ']' *)
+ | Decl_element of entity_id (* <!ELEMENT *)
+ | Decl_attlist of entity_id (* <!ATTLIST *)
+ | Decl_entity of entity_id (* <!ENTITY *)
+ | Decl_notation of entity_id (* <!NOTATION *)
+ | Decl_rangle of entity_id (* > *)
+ | Lparen of entity_id (* ( in declaration *)
+ | Rparen of entity_id (* ) in declaration *)
+ | RparenPlus of entity_id (* )+ in declaration *)
+ | RparenStar of entity_id (* )* in declaration *)
+ | RparenQmark of entity_id (* )? in declaration *)
+
+ | Tag_beg of (string*entity_id) (* <name *)
+ | Tag_end of (string*entity_id) (* </name *)
+
+ | PI of (string*string) (* <?name ... ?> *)
+ | PI_xml of (prolog_token list) (* <?xml ...?> *)
+ | Cdata of string (* <![CDATA[...]]> *)
+ | CRef of int (* &#digits; *)
+ | ERef of string (* &name; *)
+ | PERef of string (* %name; *)
+ | CharData of string (* any characters not otherwise matching *)
+ | LineEnd of string
+ | Name of string (* name *)
+ | Nametoken of string (* nmtoken but not name *)
+ | Attval of string (* attribute value; may contain entity refs *)
+ | Attval_nl_normalized of string
+ | Unparsed_string of string (* "data" or 'data' *)
+
+
+val string_of_tok : token -> string
+
+
+type lexer_set =
+ { lex_encoding : Pxp_types.rep_encoding;
+ scan_document : Lexing.lexbuf -> (token * lexers);
+ scan_content : Lexing.lexbuf -> (token * lexers);
+ scan_within_tag : Lexing.lexbuf -> (token * lexers);
+ scan_document_type : Lexing.lexbuf -> (token * lexers);
+ scan_declaration : Lexing.lexbuf -> (token * lexers);
+ scan_content_comment : Lexing.lexbuf -> (token * lexers);
+ scan_decl_comment : Lexing.lexbuf -> (token * lexers);
+ scan_document_comment: Lexing.lexbuf -> (token * lexers);
+ scan_ignored_section : Lexing.lexbuf -> (token * lexers);
+ scan_xml_pi : Lexing.lexbuf -> prolog_token;
+ scan_dtd_string : Lexing.lexbuf -> token;
+ scan_content_string : Lexing.lexbuf -> token;
+ scan_name_string : Lexing.lexbuf -> token;
+ scan_only_xml_decl : Lexing.lexbuf -> token;
+ scan_for_crlf : Lexing.lexbuf -> token;
+ }
+
+(* lexer_set: Every internal encoding has its own set of lexer functions *)
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/18 20:14:31 gerd
+ * Comment -> Comment_begin, Comment_material, Comment_end.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_lexer_types.mli:
+ *
+ * Revision 1.5 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.4 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.3 2000/05/14 17:35:12 gerd
+ * Conditional_begin, _end, and _body have an entity_id.
+ *
+ * Revision 1.2 2000/05/08 21:59:17 gerd
+ * New token Bof (beginning of file).
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * DERIVED FROM REVISION 1.3 of markup_lexer_types_shadow.mli
+ *
+ * Revision 1.3 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.2 1999/08/10 21:35:09 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:51 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+
+let lexer_set_iso88591 =
+ { lex_encoding = `Enc_iso88591;
+ scan_document = Pxp_lex_document_iso88591.scan_document;
+ scan_content = Pxp_lex_content_iso88591.scan_content;
+ scan_within_tag = Pxp_lex_within_tag_iso88591.scan_within_tag;
+ scan_document_type = Pxp_lex_document_type_iso88591.
+ scan_document_type;
+ scan_declaration = Pxp_lex_declaration_iso88591.scan_declaration;
+ scan_content_comment = Pxp_lex_misc_iso88591.scan_content_comment;
+ scan_decl_comment = Pxp_lex_misc_iso88591.scan_decl_comment;
+ scan_document_comment = Pxp_lex_misc_iso88591.scan_document_comment;
+ scan_ignored_section = Pxp_lex_name_string_iso88591.
+ scan_ignored_section;
+ scan_xml_pi = Pxp_lex_misc_iso88591.scan_xml_pi;
+ scan_dtd_string = Pxp_lex_dtd_string_iso88591.scan_dtd_string;
+ scan_content_string = Pxp_lex_content_string_iso88591.
+ scan_content_string;
+ scan_name_string = Pxp_lex_name_string_iso88591.scan_name_string;
+ scan_only_xml_decl = Pxp_lex_misc_iso88591.scan_only_xml_decl;
+ scan_for_crlf = Pxp_lex_misc_iso88591.scan_for_crlf;
+ }
+;;
+
+
+let lexer_set_utf8 = ref None
+;;
+
+
+let init_utf8 ls =
+ lexer_set_utf8 := Some ls
+;;
+
+
+let get_lexer_set enc =
+ match enc with
+ `Enc_iso88591 -> lexer_set_iso88591
+ | `Enc_utf8 ->
+ ( match !lexer_set_utf8 with
+ None ->
+ failwith ("Pxp_lexers: UTF-8 lexers not initialized")
+ | Some ls ->
+ ls
+ )
+ | _ ->
+ failwith ("Pxp_lexers: This type of internal encoding is not supported")
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/23 00:09:44 gerd
+ * The UTF-8 lexer set is no longer initialized here. It is done
+ * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+open Pxp_types
+open Pxp_lexer_types
+
+val get_lexer_set : rep_encoding -> lexer_set
+ (* Return the set of lexer functions that is able to handle the passed
+ * encoding.
+ *)
+
+val init_utf8 : lexer_set -> unit
+ (* Internally used. *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/23 00:09:44 gerd
+ * The UTF-8 lexer set is no longer initialized here. It is done
+ * in the new module Pxp_utf8. Reason: You can link without UTF-8 support.
+ *
+ * Revision 1.1 2000/05/20 20:30:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types;;
+exception Not_competent;;
+exception Not_resolvable of exn;;
+
+class type resolver =
+ object
+ method init_rep_encoding : rep_encoding -> unit
+ method init_warner : collect_warnings -> unit
+ method rep_encoding : rep_encoding
+ method open_in : ext_id -> Lexing.lexbuf
+ method close_in : unit
+ method close_all : unit
+ method change_encoding : string -> unit
+ method clone : resolver
+ end
+;;
+
+
+class virtual resolve_general
+ =
+ object (self)
+ val mutable internal_encoding = `Enc_utf8
+
+ val mutable encoding = `Enc_utf8
+ val mutable encoding_requested = false
+
+ val mutable warner = new drop_warnings
+
+ val mutable enc_initialized = false
+ val mutable wrn_initialized = false
+
+ val mutable clones = []
+
+ method init_rep_encoding e =
+ internal_encoding <- e;
+ enc_initialized <- true;
+
+ method init_warner w =
+ warner <- w;
+ wrn_initialized <- true;
+
+ method rep_encoding = (internal_encoding :> rep_encoding)
+
+(*
+ method clone =
+ ( {< encoding = `Enc_utf8;
+ encoding_requested = false;
+ >}
+ : # resolver :> resolver )
+*)
+
+ method private warn (k:int) =
+ (* Called if a character not representable has been found.
+ * k is the character code.
+ *)
+ if k < 0xd800 or (k >= 0xe000 & k <= 0xfffd) or
+ (k >= 0x10000 & k <= 0x10ffff) then begin
+ warner # warn ("Code point cannot be represented: " ^ string_of_int k);
+ end
+ else
+ raise (WF_error("Code point " ^ string_of_int k ^
+ " outside the accepted range of code points"))
+
+
+ method private autodetect s =
+ (* s must be at least 4 bytes long. The slot 'encoding' is
+ * set to:
+ * "UTF-16-BE": UTF-16/UCS-2 encoding big endian
+ * "UTF-16-LE": UTF-16/UCS-2 encoding little endian
+ * "UTF-8": UTF-8 encoding
+ *)
+ if String.length s < 4 then
+ encoding <- `Enc_utf8
+ else if String.sub s 0 2 = "\254\255" then
+ encoding <- `Enc_utf16
+ (* Note: Netconversion.recode will detect the big endianess, too *)
+ else if String.sub s 0 2 = "\255\254" then
+ encoding <- `Enc_utf16
+ (* Note: Netconversion.recode will detect the little endianess, too *)
+ else
+ encoding <- `Enc_utf8
+
+
+ method private virtual next_string : string -> int -> int -> int
+ method private virtual init_in : ext_id -> unit
+ method virtual close_in : unit
+
+ method close_all =
+ List.iter (fun r -> r # close_in) clones
+
+ method open_in xid =
+ assert(enc_initialized && wrn_initialized);
+
+ encoding <- `Enc_utf8;
+ encoding_requested <- false;
+ self # init_in xid; (* may raise Not_competent *)
+ (* init_in: may already set 'encoding' *)
+
+ let buffer_max = 512 in
+ let buffer = String.make buffer_max ' ' in
+ let buffer_len = ref 0 in
+ let buffer_end = ref false in
+ let fillup () =
+ if not !buffer_end & !buffer_len < buffer_max then begin
+ let l =
+ self # next_string buffer !buffer_len (buffer_max - !buffer_len) in
+ if l = 0 then
+ buffer_end := true
+ else begin
+ buffer_len := !buffer_len + l
+ end
+ end
+ in
+ let consume n =
+ let l = !buffer_len - n in
+ String.blit buffer n buffer 0 l;
+ buffer_len := l
+ in
+
+ fillup();
+ if not encoding_requested then self # autodetect buffer;
+
+ Lexing.from_function
+ (fun s n ->
+ (* TODO: if encoding = internal_encoding, it is possible to
+ * avoid copying buffer to s because s can be directly used
+ * as buffer.
+ *)
+
+ fillup();
+ if !buffer_len = 0 then
+ 0
+ else begin
+ let m_in = !buffer_len in
+ let m_max = if encoding_requested then n else 1 in
+ let n_in, n_out, encoding' =
+ if encoding = (internal_encoding : rep_encoding :> encoding) &&
+ encoding_requested
+ then begin
+ (* Special case encoding = internal_encoding *)
+ String.blit buffer 0 s 0 m_in;
+ m_in, m_in, encoding
+ end
+ else
+ Netconversion.recode
+ ~in_enc:encoding
+ ~in_buf:buffer
+ ~in_pos:0
+ ~in_len:m_in
+ ~out_enc:(internal_encoding : rep_encoding :> encoding)
+ ~out_buf:s
+ ~out_pos:0
+ ~out_len:n
+ ~max_chars:m_max
+ ~subst:(fun k -> self # warn k; "")
+ in
+ if n_in = 0 then
+ (* An incomplete character at the end of the stream: *)
+ raise Netconversion.Malformed_code;
+ (* failwith "Badly encoded character"; *)
+ encoding <- encoding';
+ consume n_in;
+ assert(n_out <> 0);
+ n_out
+ end)
+
+ method change_encoding enc =
+ if not encoding_requested then begin
+ if enc <> "" then begin
+ match Netconversion.encoding_of_string enc with
+ `Enc_utf16 ->
+ (match encoding with
+ (`Enc_utf16_le | `Enc_utf16_be) -> ()
+ | `Enc_utf16 -> assert false
+ | _ ->
+ raise(WF_error "Encoding of data stream and encoding declaration mismatch")
+ )
+ | e ->
+ encoding <- e
+ end;
+ (* else: the autodetected encoding counts *)
+ encoding_requested <- true;
+ end;
+ end
+;;
+
+
+class resolve_read_any_channel ?(auto_close=true) ~channel_of_id =
+ object (self)
+ inherit resolve_general as super
+
+ val f_open = channel_of_id
+ val mutable current_channel = None
+ val auto_close = auto_close
+
+ method private init_in (id:ext_id) =
+ if current_channel <> None then
+ failwith "Pxp_reader.resolve_read_any_channel # init_in";
+ let ch, enc_opt = f_open id in (* may raise Not_competent *)
+ begin match enc_opt with
+ None -> ()
+ | Some enc -> encoding <- enc; encoding_requested <- true
+ end;
+ current_channel <- Some ch;
+
+ method private next_string s ofs len =
+ match current_channel with
+ None -> failwith "Pxp_reader.resolve_read_any_channel # next_string"
+ | Some ch ->
+ input ch s ofs len
+
+ method close_in =
+ match current_channel with
+ None -> ()
+ | Some ch ->
+ if auto_close then close_in ch;
+ current_channel <- None
+
+ method clone =
+ let c = new resolve_read_any_channel
+ ?auto_close:(Some auto_close) f_open in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+
+ end
+;;
+
+
+class resolve_read_this_channel1 is_stale ?id ?fixenc ?auto_close ch =
+
+ let getchannel = ref (fun xid -> assert false) in
+
+ object (self)
+ inherit resolve_read_any_channel
+ ?auto_close:auto_close
+ (fun xid -> !getchannel xid)
+ as super
+
+ val mutable is_stale = is_stale
+ (* The channel can only be read once. To avoid that the channel
+ * is opened several times, the flag 'is_stale' is set after the
+ * first time.
+ *)
+
+ val fixid = id
+ val fixenc = fixenc
+ val fixch = ch
+
+ initializer
+ getchannel := self # getchannel
+
+ method private getchannel xid =
+ begin match fixid with
+ None -> ()
+ | Some bound_xid ->
+ if xid <> bound_xid then raise Not_competent
+ end;
+ ch, fixenc
+
+ method private init_in (id:ext_id) =
+ if is_stale then
+ raise Not_competent
+ else begin
+ super # init_in id;
+ is_stale <- true
+ end
+
+ method close_in =
+ current_channel <- None
+
+ method clone =
+ let c = new resolve_read_this_channel1
+ is_stale
+ ?id:fixid ?fixenc:fixenc ?auto_close:(Some auto_close) fixch
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+
+ end
+;;
+
+
+class resolve_read_this_channel =
+ resolve_read_this_channel1 false
+;;
+
+
+class resolve_read_any_string ~string_of_id =
+ object (self)
+ inherit resolve_general as super
+
+ val f_open = string_of_id
+ val mutable current_string = None
+ val mutable current_pos = 0
+
+ method private init_in (id:ext_id) =
+ if current_string <> None then
+ failwith "Pxp_reader.resolve_read_any_string # init_in";
+ let s, enc_opt = f_open id in (* may raise Not_competent *)
+ begin match enc_opt with
+ None -> ()
+ | Some enc -> encoding <- enc; encoding_requested <- true
+ end;
+ current_string <- Some s;
+ current_pos <- 0;
+
+ method private next_string s ofs len =
+ match current_string with
+ None -> failwith "Pxp_reader.resolve_read_any_string # next_string"
+ | Some str ->
+ let l = min len (String.length str - current_pos) in
+ String.blit str current_pos s ofs l;
+ current_pos <- current_pos + l;
+ l
+
+ method close_in =
+ match current_string with
+ None -> ()
+ | Some _ ->
+ current_string <- None
+
+ method clone =
+ let c = new resolve_read_any_string f_open in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+ end
+;;
+
+
+class resolve_read_this_string1 is_stale ?id ?fixenc str =
+
+ let getstring = ref (fun xid -> assert false) in
+
+ object (self)
+ inherit resolve_read_any_string (fun xid -> !getstring xid) as super
+
+ val is_stale = is_stale
+ (* For some reasons, it is not allowed to open a clone of the resolver
+ * a second time when the original resolver is already open.
+ *)
+
+ val fixid = id
+ val fixenc = fixenc
+ val fixstr = str
+
+ initializer
+ getstring := self # getstring
+
+ method private getstring xid =
+ begin match fixid with
+ None -> ()
+ | Some bound_xid ->
+ if xid <> bound_xid then raise Not_competent
+ end;
+ fixstr, fixenc
+
+
+ method private init_in (id:ext_id) =
+ if is_stale then
+ raise Not_competent
+ else
+ super # init_in id
+
+ method clone =
+ let c = new resolve_read_this_string1
+ (is_stale or current_string <> None)
+ ?id:fixid ?fixenc:fixenc fixstr
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolver)
+ end
+;;
+
+
+class resolve_read_this_string =
+ resolve_read_this_string1 false
+;;
+
+
+class resolve_read_url_channel
+ ?(base_url = Neturl.null_url)
+ ?auto_close
+ ~url_of_id
+ ~channel_of_url
+
+ : resolver
+ =
+
+ let getchannel = ref (fun xid -> assert false) in
+
+ object (self)
+ inherit resolve_read_any_channel
+ ?auto_close:auto_close
+ (fun xid -> !getchannel xid)
+ as super
+
+ val base_url = base_url
+ val mutable own_url = Neturl.null_url
+
+ val url_of_id = url_of_id
+ val channel_of_url = channel_of_url
+
+
+ initializer
+ getchannel := self # getchannel
+
+ method private getchannel xid =
+ let rel_url = url_of_id xid in (* may raise Not_competent *)
+
+ try
+ (* Now compute the absolute URL: *)
+ let abs_url = Neturl.apply_relative_url base_url rel_url in
+ (* may raise Malformed_URL *)
+
+ (* Simple check whether 'abs_url' is really absolute: *)
+ if not(Neturl.url_provides ~scheme:true abs_url)
+ then raise Not_competent;
+
+ own_url <- abs_url;
+ (* FIXME: Copy 'abs_url' ? *)
+
+ (* Get and return the channel: *)
+ channel_of_url abs_url (* may raise Not_competent *)
+ with
+ Neturl.Malformed_URL -> raise (Not_resolvable Neturl.Malformed_URL)
+ | Not_competent -> raise (Not_resolvable Not_found)
+
+ method clone =
+ let c =
+ new resolve_read_url_channel
+ ?base_url:(Some own_url)
+ ?auto_close:(Some auto_close)
+ ~url_of_id:url_of_id
+ ~channel_of_url:channel_of_url
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ (c :> resolve_read_url_channel)
+ end
+;;
+
+
+type spec = [ `Not_recognized | `Allowed | `Required ]
+
+class resolve_as_file
+ ?(file_prefix = (`Allowed :> spec))
+ ?(host_prefix = (`Allowed :> spec))
+ ?(system_encoding = `Enc_utf8)
+ ?url_of_id:passed_url_of_id
+ ?channel_of_url:passed_channel_of_url
+ ()
+ =
+
+ let url_syntax =
+ let enable_if =
+ function
+ `Not_recognized -> Neturl.Url_part_not_recognized
+ | `Allowed -> Neturl.Url_part_allowed
+ | `Required -> Neturl.Url_part_required
+ in
+ { Neturl.null_url_syntax with
+ Neturl.url_enable_scheme = enable_if file_prefix;
+ Neturl.url_enable_host = enable_if host_prefix;
+ Neturl.url_enable_path = Neturl.Url_part_required;
+ Neturl.url_accepts_8bits = true;
+ }
+ in
+
+ let base_url_syntax =
+ { Neturl.null_url_syntax with
+ Neturl.url_enable_scheme = Neturl.Url_part_required;
+ Neturl.url_enable_host = Neturl.Url_part_allowed;
+ Neturl.url_enable_path = Neturl.Url_part_required;
+ Neturl.url_accepts_8bits = true;
+ }
+ in
+
+ let default_base_url =
+ Neturl.make_url
+ ~scheme: "file"
+ ~host: ""
+ ~path: (Neturl.split_path (Sys.getcwd() ^ "/"))
+ base_url_syntax
+ in
+
+ let file_url_of_id xid =
+ let file_url_of_sysname sysname =
+ (* By convention, we can assume that sysname is a URL conforming
+ * to RFC 1738 with the exception that it may contain non-ASCII
+ * UTF-8 characters.
+ *)
+ try
+ Neturl.url_of_string url_syntax sysname
+ (* may raise Malformed_URL *)
+ with
+ Neturl.Malformed_URL -> raise Not_competent
+ in
+ let url =
+ match xid with
+ Anonymous -> raise Not_competent
+ | Public (_,sysname) -> if sysname <> "" then file_url_of_sysname sysname
+ else raise Not_competent
+ | System sysname -> file_url_of_sysname sysname
+ in
+ let scheme =
+ try Neturl.url_scheme url with Not_found -> "file" in
+ let host =
+ try Neturl.url_host url with Not_found -> "" in
+
+ if scheme <> "file" then raise Not_competent;
+ if host <> "" && host <> "localhost" then raise Not_competent;
+
+ url
+ in
+
+ let channel_of_file_url url =
+ try
+ let path_utf8 =
+ try Neturl.join_path (Neturl.url_path ~encoded:false url)
+ with Not_found -> raise Not_competent
+ in
+
+ let path =
+ Netconversion.recode_string
+ ~in_enc: `Enc_utf8
+ ~out_enc: system_encoding
+ path_utf8 in
+ (* May raise Bad_character_stream *)
+
+ open_in_bin path, None
+ (* May raise Sys_error *)
+
+ with
+ | Netconversion.Malformed_code -> assert false
+ (* should not happen *)
+
+ in
+
+ let url_of_id id =
+ match passed_url_of_id with
+ None ->
+ file_url_of_id id
+ | Some f ->
+ begin
+ try f id
+ with
+ Not_competent -> file_url_of_id id
+ end
+ in
+
+ let channel_of_url url =
+ match passed_channel_of_url with
+ None ->
+ channel_of_file_url url
+ | Some f ->
+ begin
+ try f url
+ with
+ Not_competent -> channel_of_file_url url
+ end
+ in
+
+ resolve_read_url_channel
+ ~base_url: default_base_url
+ ~auto_close: true
+ ~url_of_id: url_of_id
+ ~channel_of_url: channel_of_url
+;;
+
+
+class combine ?prefer rl =
+ object (self)
+ val prefered_resolver = prefer
+ val resolvers = (rl : resolver list)
+ val mutable internal_encoding = `Enc_utf8
+ val mutable warner = new drop_warnings
+ val mutable active_resolver = None
+ val mutable clones = []
+
+ method init_rep_encoding enc =
+ List.iter
+ (fun r -> r # init_rep_encoding enc)
+ rl;
+ internal_encoding <- enc
+
+ method init_warner w =
+ List.iter
+ (fun r -> r # init_warner w)
+ rl;
+ warner <- w;
+
+ method rep_encoding = internal_encoding
+ (* CAUTION: This may not be the truth! *)
+
+ method open_in xid =
+ let rec find_competent_resolver rl =
+ match rl with
+ r :: rl' ->
+ begin try
+ r, (r # open_in xid)
+ with
+ Not_competent -> find_competent_resolver rl'
+ end;
+ | [] ->
+ raise Not_competent
+ in
+
+ if active_resolver <> None then failwith "Pxp_reader.combine # open_in";
+ let r, lb =
+ match prefered_resolver with
+ None -> find_competent_resolver resolvers
+ | Some r -> find_competent_resolver (r :: resolvers)
+ in
+ active_resolver <- Some r;
+ lb
+
+ method close_in =
+ match active_resolver with
+ None -> ()
+ | Some r -> r # close_in;
+ active_resolver <- None
+
+ method close_all =
+ List.iter (fun r -> r # close_in) clones
+
+ method change_encoding (enc:string) =
+ match active_resolver with
+ None -> failwith "Pxp_reader.combine # change_encoding"
+ | Some r -> r # change_encoding enc
+
+ method clone =
+ let c =
+ match active_resolver with
+ None ->
+ new combine ?prefer:None (List.map (fun q -> q # clone) resolvers)
+ | Some r ->
+ let r' = r # clone in
+ new combine
+ ?prefer:(Some r')
+ (List.map
+ (fun q -> if q == r then r' else q # clone)
+ resolvers)
+ in
+ c # init_rep_encoding internal_encoding;
+ c # init_warner warner;
+ clones <- c :: clones;
+ c
+ end
+
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.9 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.8 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.7 2000/07/09 15:32:01 gerd
+ * Fix in resolve_this_channel, resolve_this_string
+ *
+ * Revision 1.6 2000/07/09 01:05:33 gerd
+ * New methode 'close_all' that closes the clones, too.
+ *
+ * Revision 1.5 2000/07/08 16:24:56 gerd
+ * Introduced the exception 'Not_resolvable' to indicate that
+ * 'combine' should not try the next resolver of the list.
+ *
+ * Revision 1.4 2000/07/06 23:04:46 gerd
+ * Quick fix for 'combine': The active resolver is "prefered",
+ * but the other resolvers are also used.
+ *
+ * Revision 1.3 2000/07/06 21:43:45 gerd
+ * Fix: Public(_,name) is now treated as System(name) if
+ * name is non-empty.
+ *
+ * Revision 1.2 2000/07/04 22:13:30 gerd
+ * Implemented the new API rev. 1.2 of pxp_reader.mli.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_reader.ml:
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/03/13 23:41:44 gerd
+ * Initial revision; this code was formerly part of Markup_entity.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Pxp_types;;
+
+exception Not_competent;;
+ (* Raised by the 'open_in' method if the object does not know how to
+ * handle the passed external ID.
+ *)
+
+exception Not_resolvable of exn;;
+ (* Indicates that one resolver was competent, but there was an error
+ * while resolving the external ID. The passed exception explains the
+ * reason.
+ * Not_resolvable(Not_found) serves as indicator for an unknown reason.
+ *)
+
+
+(* The class type 'resolver' is the official type of all "resolvers".
+ * Resolvers take file names (or better, external identifiers) and
+ * return lexbufs, scanning the file for tokens. Resolvers may be
+ * cloned, and clones can interpret relative file names relative to
+ * their creator.
+ *
+ * Example of the latter:
+ *
+ * Resolver r reads from file:/dir/f1.xml
+ *
+ * <tag>some XML text
+ * &e; -----> Entity e is bound to "subdir/f2.xml"
+ * </tag> Step (1): let r' = "clone of r"
+ * Step (2): open file "subdir/f2.xml"
+ *
+ * r' must still know the directory of the file r is reading, otherwise
+ * it would not be able to resolve "subdir/f2.xml" = "file:/dir/subdir/f2.xml".
+ *
+ * Actually, this example can be coded as:
+ *
+ * let r = new resolve_as_file in
+ * let lbuf = r # open_in "file:/dir/f1.xml" in
+ * ... read from lbuf ...
+ * let r' = r # clone in
+ * let lbuf' = r' # open_in "subdir/f2.xml" in
+ * ... read from lbuf' ...
+ * r' # close_in;
+ * ... read from lbuf ...
+ * r # close_in;
+ *)
+
+class type resolver =
+ object
+ (* A resolver can open an input source, and returns this source as
+ * Lexing.lexbuf.
+ *
+ * After creating a resolver, one must invoke the two methods
+ * init_rep_encoding and init_warner to set the internal encoding of
+ * strings and the warner object, respectively. This is normally
+ * done by the parsing functions in Pxp_yacc.
+ * It is not necessary to invoke these two methods for a fresh
+ * clone.
+ *
+ * It is possible that the character encoding of the source and the
+ * internal encoding of the parser are different. To cope with this,
+ * one of the tasks of the resolver is to recode the characters of
+ * the input source into the internal character encoding.
+ *
+ * Note that there are several ways of determining the encoding of the
+ * input: (1) It is possible that the transport protocol (e.g. HTTP)
+ * transmits the encoding, and (2) it is possible to inspect the beginning
+ * of the file, and to analyze:
+ * (2.1) The first two bytes indicate whether UTF-16 is used
+ * (2.2) Otherwise, one can assume that an ASCII-compatible character
+ * set is used. It is now possible to read the XML declaration
+ * <?xml ... encoding="xyz" ...?>. The encoding found here is
+ * to be used.
+ * (2.3) If the XML declaration is missing, the encoding is UTF-8.
+ * The resolver needs only to distinguish between cases (1), (2.1),
+ * and the rest.
+ * The details of analyzing whether (2.2) or (2.3) applies are programmed
+ * elsewhere, and the resolver will be told the result (see below).
+ *
+ * A resolver is like a file: it must be opened before one can work
+ * with it, and it should be closed after all operations on it have been
+ * done. The method 'open_in' is called with the external ID as argument
+ * and it must return the lexbuf reading from the external resource.
+ * The method 'close_in' does not require an argument.
+ *
+ * It is allowed to re-open a resolver after it has been closed. It is
+ * forbidden to open a resolver again while it is open.
+ * It is allowed to close a resolver several times: If 'close_in' is
+ * invoked while the resolver is already closed, nothing happens.
+ *
+ * The method 'open_in' may raise Not_competent to indicate that this
+ * resolver is not able to open this type of IDs.
+ *
+ * The method 'change_encoding' is called from the parser after the
+ * analysis of case (2) has been done; the argument is either the
+ * string name of the encoding, or the empty string to indicate
+ * that no XML declaration was found. It is guaranteed that
+ * 'change_encoding' is invoked after only a few tokens of the
+ * file. The resolver should react as follows:
+ * - If case (1) applies: Ignore the encoding passed to 'change_encoding'.
+ * - If case (2.1) applies: The encoding passed to 'change_encoding' must
+ * be compatible with UTF-16. This should be
+ * checked, and violations should be reported.
+ * - Else: If the passed encoding is "", assume UTF-8.
+ * Otherwise, assume the passed encoding.
+ *
+ * The following rule helps synchronizing the lexbuf with the encoding:
+ * If the resolver has been opened, but 'change_encoding' has not yet
+ * been invoked, the lexbuf contains at most one character (which may
+ * be represented by multiple bytes); i.e. the lexbuf is created by
+ * Lexing.from_function, and the function puts only one character into
+ * the buffer at once.
+ * After 'change_encoding' has been invoked, there is no longer a limit
+ * on the lexbuf size.
+ *
+ * The reason for this rule is that you know exactly the character where
+ * the encoding changes to the encoding passed by 'change_encoding'.
+ *
+ * The method 'clone' may be invoked for open or closed resolvers.
+ * Basically, 'clone' returns a new resolver which is always closed.
+ * If the original resolver is closed, the clone is simply a clone.
+ * If the original resolver is open at the moment of cloning:
+ * If the clone is later opened for a relative system ID (i.e. relative
+ * URL), the clone must interpret this ID relative to the ID of the
+ * original resolver.
+ *)
+ method init_rep_encoding : rep_encoding -> unit
+ method init_warner : collect_warnings -> unit
+
+ method rep_encoding : rep_encoding
+
+ method open_in : ext_id -> Lexing.lexbuf
+ (* May raise Not_competent if the object does not know how to handle
+ * this ext_id.
+ *)
+ method close_in : unit
+ method change_encoding : string -> unit
+
+
+ (* Every resolver can be cloned. The clone does not inherit the connection
+ * with the external object, i.e. it is initially closed.
+ *)
+ method clone : resolver
+
+ method close_all : unit
+ (* Closes this resolver and every clone *)
+
+ end
+;;
+
+(* Note: resolve_general is no longer exported. In most cases, the classes
+ * resolve_read_any_channel or resolve_read_any_string are applicable, too,
+ * and much easier to configure.
+ *)
+
+
+(* The next classes are resolvers for concrete input sources. *)
+
+class resolve_read_this_channel :
+ ?id:ext_id -> ?fixenc:encoding -> ?auto_close:bool ->
+ in_channel -> resolver;;
+
+ (* Reads from the passed channel (it may be even a pipe). If the ~id
+ * argument is passed to the object, the created resolver accepts only
+ * this ID. Otherwise all IDs are accepted.
+ * Once the resolver has been cloned, it does not accept any ID. This
+ * means that this resolver cannot handle inner references to external
+ * entities. Note that you can combine this resolver with another resolver
+ * that can handle inner references (such as resolve_as_file); see
+ * class 'combine' below.
+ * If you pass the ~fixenc argument, the encoding of the channel is
+ * set to the passed value, regardless of any auto-recognition or
+ * any XML declaration.
+ * If ?auto_close = true (which is the default), the channel is
+ * closed after use. If ?auto_close = false, the channel is left open.
+ *)
+
+
+class resolve_read_any_channel :
+ ?auto_close:bool ->
+ channel_of_id:(ext_id -> (in_channel * encoding option)) ->
+ resolver;;
+
+ (* resolve_read_any_channel f_open:
+ * This resolver calls the function f_open to open a new channel for
+ * the passed ext_id. This function must either return the channel and
+ * the encoding, or it must fail with Not_competent.
+ * The function must return None as encoding if the default mechanism to
+ * recognize the encoding should be used. It must return Some e if it is
+ * already known that the encoding of the channel is e.
+ * If ?auto_close = true (which is the default), the channel is
+ * closed after use. If ?auto_close = false, the channel is left open.
+ *)
+
+
+class resolve_read_url_channel :
+ ?base_url:Neturl.url ->
+ ?auto_close:bool ->
+ url_of_id:(ext_id -> Neturl.url) ->
+ channel_of_url:(Neturl.url -> (in_channel * encoding option)) ->
+ resolver;;
+
+ (* resolve_read_url_channel url_of_id channel_of_url:
+ *
+ * When this resolver gets an ID to read from, it calls the function
+ * ~url_of_id to get the corresponding URL. This URL may be a relative
+ * URL; however, a URL scheme must be used which contains a path.
+ * The resolver converts the URL to an absolute URL if necessary.
+ * The second function, ~channel_of_url, is fed with the absolute URL
+ * as input. This function opens the resource to read from, and returns
+ * the channel and the encoding of the resource.
+ *
+ * Both functions, ~url_of_id and ~channel_of_url, can raise
+ * Not_competent to indicate that the object is not able to read from
+ * the specified resource. However, there is a difference: A Not_competent
+ * from ~url_of_id is left as it is, but a Not_competent from ~channel_of_url
+ * is converted to Not_resolvable. So only ~url_of_id decides which URLs
+ * are accepted by the resolver and which not.
+ *
+ * The function ~channel_of_url must return None as encoding if the default
+ * mechanism to recognize the encoding should be used. It must return
+ * Some e if it is already known that the encoding of the channel is e.
+ *
+ * If ?auto_close = true (which is the default), the channel is
+ * closed after use. If ?auto_close = false, the channel is left open.
+ *
+ * Objects of this class contain a base URL relative to which relative
+ * URLs are interpreted. When creating a new object, you can specify
+ * the base URL by passing it as ~base_url argument. When an existing
+ * object is cloned, the base URL of the clone is the URL of the original
+ * object.
+ *
+ * Note that the term "base URL" has a strict definition in RFC 1808.
+ *)
+
+
+class resolve_read_this_string :
+ ?id:ext_id -> ?fixenc:encoding -> string -> resolver;;
+
+ (* Reads from the passed string. If the ~id
+ * argument is passed to the object, the created resolver accepts only
+ * this ID. Otherwise all IDs are accepted.
+ * Once the resolver has been cloned, it does not accept any ID. This
+ * means that this resolver cannot handle inner references to external
+ * entities. Note that you can combine this resolver with another resolver
+ * that can handle inner references (such as resolve_as_file); see
+ * class 'combine' below.
+ * If you pass the ~fixenc argument, the encoding of the string is
+ * set to the passed value, regardless of any auto-recognition or
+ * any XML declaration.
+ *)
+
+
+class resolve_read_any_string :
+ string_of_id:(ext_id -> (string * encoding option)) -> resolver;;
+
+ (* resolver_read_any_string f_open:
+ * This resolver calls the function f_open to get the string for
+ * the passed ext_id. This function must either return the string and
+ * the encoding, or it must fail with Not_competent.
+ * The function must return None as encoding if the default mechanism to
+ * recognize the encoding should be used. It must return Some e if it is
+ * already known that the encoding of the string is e.
+ *)
+
+
+class resolve_as_file :
+ ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+ ?system_encoding:encoding ->
+ ?url_of_id:(ext_id -> Neturl.url) ->
+ ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+ unit ->
+ resolver;;
+
+ (* Reads from the local file system. Every file name is interpreted as
+ * file name of the local file system, and the referred file is read.
+ *
+ * The full form of a file URL is: file://host/path, where
+ * 'host' specifies the host system where the file identified 'path'
+ * resides. host = "" or host = "localhost" are accepted; other values
+ * will raise Not_competent. The standard for file URLs is
+ * defined in RFC 1738.
+ *
+ * Option ~file_prefix: Specifies how the "file:" prefix of file names
+ * is handled:
+ * `Not_recognized: The prefix is not recognized.
+ * `Allowed: The prefix is allowed but not required (the default).
+ * `Required: The prefix is required.
+ *
+ * Option ~host_prefix: Specifies how the "//host" phrase of file names
+ * is handled:
+ * `Not_recognized: The phrase is not recognized.
+ * `Allowed: The phrase is allowed but not required (the default).
+ * `Required: The phrase is required.
+ *
+ * Option ~system_encoding: Specifies the encoding of file names of
+ * the local file system. Default: UTF-8.
+ *
+ * Options ~url_of_id, ~channel_of_url: Not for the end user!
+ *)
+
+
+class combine : ?prefer:resolver -> resolver list -> resolver;;
+
+ (* Combines several resolver objects. If a concrete entity with an
+ * ext_id is to be opened, the combined resolver tries the contained
+ * resolvers in turn until a resolver accepts opening the entity
+ * (i.e. it does not raise Not_competent on open_in).
+ *
+ * Clones: If the 'clone' method is invoked before 'open_in', all contained
+ * resolvers are cloned and again combined. If the 'clone' method is
+ * invoked after 'open_in' (i.e. while the resolver is open), only the
+ * active resolver is cloned.
+ *)
+
+(* EXAMPLES OF RESOLVERS:
+ *
+ * let r1 = new resolve_as_file
+ * - r1 can open all local files
+ *
+ * let r2 = new resolve_read_this_channel
+ * ~id:"file:/dir/f.xml"
+ * (open_in "/dir/f.xml")
+ * - r2 can only read /dir/f.xml of the local file system. If this file
+ * contains references to other files, r2 will fail
+ *
+ * let r3 = new combine [ r2; r1 ]
+ * - r3 reads /dir/f.xml of the local file system by calling r2, and all
+ * other files by calling r1
+ *)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.5 2000/07/09 01:05:33 gerd
+ * New methode 'close_all' that closes the clones, too.
+ *
+ * Revision 1.4 2000/07/08 16:24:56 gerd
+ * Introduced the exception 'Not_resolvable' to indicate that
+ * 'combine' should not try the next resolver of the list.
+ *
+ * Revision 1.3 2000/07/06 23:04:46 gerd
+ * Quick fix for 'combine': The active resolver is "prefered",
+ * but the other resolvers are also used.
+ *
+ * Revision 1.2 2000/07/04 22:06:49 gerd
+ * MAJOR CHANGE: Complete redesign of the reader classes.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_reader.mli:
+ *
+ * Revision 1.3 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/03/13 23:41:54 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+type ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+
+
+type dtd_id =
+ External of ext_id
+ | Derived of ext_id
+ | Internal
+;;
+
+type content_model_type =
+ Unspecified
+ | Empty
+ | Any
+ | Mixed of mixed_spec list
+ | Regexp of regexp_spec
+
+and mixed_spec =
+ MPCDATA
+ | MChild of string
+
+and regexp_spec =
+ Optional of regexp_spec
+ | Repeated of regexp_spec
+ | Repeated1 of regexp_spec
+ | Alt of regexp_spec list
+ | Seq of regexp_spec list
+ | Child of string
+;;
+
+
+type att_type =
+ A_cdata
+ | A_id
+ | A_idref
+ | A_idrefs
+ | A_entity
+ | A_entities
+ | A_nmtoken
+ | A_nmtokens
+ | A_notation of string list
+ | A_enum of string list
+;;
+
+
+type att_default =
+ D_required
+ | D_implied
+ | D_default of string (* The default value is already expanded *)
+ | D_fixed of string (* The default value is already expanded *)
+;;
+
+
+type att_value =
+ Value of string
+ | Valuelist of string list
+ | Implied_value
+;;
+
+
+class type collect_warnings =
+ object
+ method warn : string -> unit
+ end
+;;
+
+
+class drop_warnings =
+ object
+ method warn (w:string) = ()
+ end
+;;
+
+
+type encoding = Netconversion.encoding;;
+
+type rep_encoding =
+ (* The subset of 'encoding' that may be used for internal representation
+ * of strings.
+ *)
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ ]
+;;
+
+
+exception Validation_error of string
+
+exception WF_error of string
+
+exception Error of string
+
+exception Character_not_supported
+
+exception At of (string * exn)
+
+exception Undeclared
+
+
+let rec string_of_exn x0 =
+ match x0 with
+ At (s, x) ->
+ s ^ string_of_exn x
+ | Validation_error s ->
+ "ERROR (Validity constraint): " ^ s
+ | WF_error s ->
+ "ERROR (Well-formedness constraint): " ^ s
+ | Error s ->
+ "ERROR: " ^ s
+ | Character_not_supported ->
+ "RESTRICTION: Character not supported"
+ | Netconversion.Malformed_code ->
+ "ERROR: Bad character stream"
+ | Undeclared ->
+ "INFORMATION: Undeclared"
+ | Parsing.Parse_error ->
+ "SYNTAX ERROR"
+ | _ ->
+ "Other exception: " ^ Printexc.to_string x0
+;;
+
+
+type output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+;;
+
+
+let write os str pos len =
+ match os with
+ Out_buffer b -> Buffer.add_substring b str pos len
+ | Out_channel ch -> output ch str pos len
+ | Out_function f -> f str pos len
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.6 2000/07/27 00:41:15 gerd
+ * new 8 bit codes
+ *
+ * Revision 1.5 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.4 2000/07/14 21:25:27 gerd
+ * Simplified the type 'collect_warnings'.
+ *
+ * Revision 1.3 2000/07/08 16:23:50 gerd
+ * Added the exception 'Error'.
+ *
+ * Revision 1.2 2000/07/04 22:14:05 gerd
+ * Implemented the changes of rev. 1.2 of pxp_types.mli.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_types.ml:
+ *
+ * Revision 1.7 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5 2000/05/01 20:43:19 gerd
+ * New type output_stream; new function 'write'.
+ *
+ * Revision 1.4 1999/09/01 16:25:35 gerd
+ * Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3 1999/08/15 02:22:33 gerd
+ * Added exception Undeclared.
+ *
+ * Revision 1.2 1999/08/14 22:14:58 gerd
+ * New class "collect_warnings".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright 1999 by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+type ext_id =
+ System of string
+ | Public of (string * string)
+ | Anonymous
+
+ (* external identifiers are either "system identifiers" (filenames or URLs),
+ * or "public identifiers" Public(id,sysid) where "id" is the representation
+ * of the public ID, and "sysid" a fallback system ID, or the empty string.
+ *
+ * New in PXP: Sometimes the external ID is not known. This case can be
+ * referred to as Anonymous ID.
+ *
+ * Encoding: The identifiers are _always_ encoded as UTF8 strings,
+ * regardless of whether another encoding is configured for the parser.
+ * TODO: umsetzen
+ *)
+
+
+type dtd_id =
+ External of ext_id (* DTD is completely external *)
+ | Derived of ext_id (* DTD is derived from an external DTD *)
+ | Internal (* DTD is completely internal *)
+;;
+
+type content_model_type =
+ Unspecified (* A specification of the model has not yet been
+ * found
+ *)
+ | Empty (* Nothing is allowed as content *)
+ | Any (* Everything is allowed as content *)
+ | Mixed of mixed_spec list (* The contents consist of elements and PCDATA
+ * in arbitrary order. What is allowed in
+ * particular is given as mixed_spec.
+ *)
+ | Regexp of regexp_spec (* The contents are elements following this regular
+ * expression
+ *)
+
+and mixed_spec =
+ MPCDATA (* PCDATA children are allowed *)
+ | MChild of string (* This kind of Element is allowed *)
+
+and regexp_spec =
+ Optional of regexp_spec (* subexpression? *)
+ | Repeated of regexp_spec (* subexpression* *)
+ | Repeated1 of regexp_spec (* subexpression+ *)
+ | Alt of regexp_spec list (* subexpr1 | subexpr2 | ... | subexprN *)
+ | Seq of regexp_spec list (* subexpr1 , subexpr2 , ... , subexprN *)
+ | Child of string (* This kind of Element is allowed here *)
+;;
+
+
+type att_type =
+ A_cdata (* CDATA *)
+ | A_id (* ID *)
+ | A_idref (* IDREF *)
+ | A_idrefs (* IDREFS *)
+ | A_entity (* ENTITY *)
+ | A_entities (* ENTiTIES *)
+ | A_nmtoken (* NMTOKEN *)
+ | A_nmtokens (* NMTOKENS *)
+ | A_notation of string list (* NOTATION (name1 | name2 | ... | nameN) *)
+ | A_enum of string list (* (name1 | name2 | ... | nameN) *)
+;;
+
+
+type att_default =
+ D_required (* #REQUIRED *)
+ | D_implied (* #IMPLIED *)
+ | D_default of string (* <value> -- The value is already expanded *)
+ | D_fixed of string (* FIXED <value> -- The value is already expanded *)
+;;
+
+
+type att_value =
+ Value of string (* a single value *)
+ | Valuelist of string list (* a list of values *)
+ | Implied_value (* a value left out *)
+;;
+
+
+class type collect_warnings =
+ object
+ method warn : string -> unit
+ end
+;;
+
+
+class drop_warnings : collect_warnings;;
+
+
+type encoding = Netconversion.encoding;;
+ (* We accept all encodings for character sets which are defined in
+ * Netconversion (package netstring).
+ *)
+
+type rep_encoding =
+ (* The subset of 'encoding' that may be used for internal representation
+ * of strings.
+ * Note: The following encodings are ASCII-compatible! This is an important
+ * property used throghout the whole PXP code.
+ *)
+ [ `Enc_utf8 (* UTF-8 *)
+ | `Enc_iso88591 (* ISO-8859-1 *)
+ ]
+;;
+
+
+exception Validation_error of string
+ (* Violation of a validity constraint *)
+
+exception WF_error of string
+ (* Violation of a well-formedness constraint *)
+
+exception Error of string
+ (* Other error *)
+
+exception Character_not_supported
+
+exception At of (string * exn)
+ (* The string is a description where the exn happened. The exn value can
+ * again be At(_,_) (for example, when an entity within an entity causes
+ * the error).
+ *)
+
+exception Undeclared
+ (* Indicates that declaration is available and because of this every kind
+ * of usage is allowed.
+ *)
+
+val string_of_exn : exn -> string
+ (* Converts a Markup exception into a readable string *)
+
+
+type output_stream =
+ Out_buffer of Buffer.t
+ | Out_channel of out_channel
+ | Out_function of (string -> int -> int -> unit)
+
+val write : output_stream -> string -> int -> int -> unit
+ (* write os s pos len: Writes the string to the buffer/channel/stream *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.7 2000/07/27 00:41:15 gerd
+ * new 8 bit codes
+ *
+ * Revision 1.6 2000/07/16 18:31:09 gerd
+ * The exception Illegal_character has been dropped.
+ *
+ * Revision 1.5 2000/07/16 16:34:21 gerd
+ * Updated comments.
+ *
+ * Revision 1.4 2000/07/14 21:25:27 gerd
+ * Simplified the type 'collect_warnings'.
+ *
+ * Revision 1.3 2000/07/08 16:23:50 gerd
+ * Added the exception 'Error'.
+ *
+ * Revision 1.2 2000/07/04 22:08:26 gerd
+ * type ext_id: New variant Anonymous. - The System and Public
+ * variants are now encoded as UTF-8.
+ * collect_warnings is now a class type only. New class
+ * drop_warnings.
+ * New functions encoding_of_string and string_of_encoding.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from Markup_types.mli:
+ *
+ * Revision 1.7 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.6 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.5 2000/05/01 20:43:25 gerd
+ * New type output_stream; new function 'write'.
+ *
+ * Revision 1.4 1999/09/01 16:25:35 gerd
+ * Dropped Illegal_token and Content_not_allowed_here. WF_error can
+ * be used instead.
+ *
+ * Revision 1.3 1999/08/15 02:22:40 gerd
+ * Added exception Undeclared.
+ *
+ * Revision 1.2 1999/08/14 22:15:17 gerd
+ * New class "collect_warnings".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+open Pxp_types;;
+open Pxp_lexer_types;;
+
+Pxp_lexers.init_utf8
+ { lex_encoding = `Enc_utf8;
+ scan_document = Pxp_lex_document_utf8.scan_document;
+ scan_content = Pxp_lex_content_utf8.scan_content;
+ scan_within_tag = Pxp_lex_within_tag_utf8.scan_within_tag;
+ scan_document_type = Pxp_lex_document_type_utf8.
+ scan_document_type;
+ scan_declaration = Pxp_lex_declaration_utf8.scan_declaration;
+ scan_content_comment = Pxp_lex_misc_utf8.scan_content_comment;
+ scan_decl_comment = Pxp_lex_misc_utf8.scan_decl_comment;
+ scan_document_comment = Pxp_lex_misc_utf8.scan_document_comment;
+ scan_ignored_section = Pxp_lex_name_string_utf8.scan_ignored_section;
+ scan_xml_pi = Pxp_lex_misc_utf8.scan_xml_pi;
+ scan_dtd_string = Pxp_lex_dtd_string_utf8.scan_dtd_string;
+ scan_content_string = Pxp_lex_content_string_utf8.
+ scan_content_string;
+ scan_name_string = Pxp_lex_name_string_utf8.scan_name_string;
+ scan_only_xml_decl = Pxp_lex_misc_utf8.scan_only_xml_decl;
+ scan_for_crlf = Pxp_lex_misc_utf8.scan_for_crlf;
+ }
+;;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.3 2000/06/04 20:31:44 gerd
+ * Updated.
+ *
+ * Revision 1.2 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.1 2000/05/23 00:08:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+(* This is a module without interface. Its initialization part sets up
+ * the UTF-8 lexers.
+ * Link with this module if you want to use the UTF-8 lexers!
+ *)
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.1 2000/05/23 00:08:48 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$ -*- tuareg -*-
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+open Parsing
+open Pxp_types
+open Pxp_lexer_types
+open Pxp_dtd
+open Pxp_entity
+open Pxp_document
+open Pxp_aux
+
+(* Some types from the interface definition: *)
+
+exception ID_not_unique
+
+class type [ 'ext ] index =
+object
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ method find : string -> 'ext node
+end
+
+
+type config =
+ { warner : collect_warnings;
+ errors_with_line_numbers : bool;
+ enable_pinstr_nodes : bool;
+ enable_super_root_node : bool;
+ enable_comment_nodes : bool;
+ encoding : rep_encoding;
+ recognize_standalone_declaration : bool;
+ store_element_positions : bool;
+ idref_pass : bool;
+ validate_by_dfa : bool;
+ accept_only_deterministic_models : bool;
+ debugging_mode : bool;
+ }
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+
+
+type start_symbol =
+ Ext_document
+ | Ext_declarations
+ | Ext_element
+
+
+type context =
+ { mutable current : unit -> token; (* get the current token *)
+ mutable get_next : unit -> token; (* go on to the next token; return it *)
+ mutable current_token : token; (* This is the current token *)
+ mutable manager : entity_manager; (* The entity manager *)
+ }
+
+
+let make_context entity_manager =
+ let c =
+ { current = (fun _ -> assert false);
+ get_next = (fun _ -> assert false);
+ current_token = Eof;
+ manager = entity_manager;
+ }
+ in
+ (* Note that the function which is stored in get_next_ref can be changed
+ * as a side-effect when an entity is opened or closed. The function in
+ * c.get_next must be programmed such that always the current "get_next"
+ * function is executed.
+ *)
+ let get_next_ref = entity_manager # yy_get_next_ref in
+ c.current <- (fun () -> c.current_token);
+ c.get_next <- (fun () -> let tok = !get_next_ref() in
+ c.current_token <- tok;
+ tok);
+ ignore(c.get_next());
+ c
+;;
+
+
+let from_channel ?system_encoding ?id:init_id ?fixenc ch =
+
+ (* Reading from a channel works by modifying the algorithm of
+ * resolve_as_file.
+ *)
+
+ let url_syntax = (* A syntax suitable for "file" URLs *)
+ { Neturl.null_url_syntax with
+ Neturl.url_enable_scheme = Neturl.Url_part_allowed;
+ Neturl.url_enable_host = Neturl.Url_part_allowed;
+ Neturl.url_enable_path = Neturl.Url_part_required;
+ Neturl.url_accepts_8bits = true;
+ }
+ in
+
+ let an_url =
+ Neturl.make_url
+ ~scheme: "file"
+ ~host: ""
+ ~path: [ "" ]
+ url_syntax
+ in
+
+ let init_channel_done = ref false in
+ (* Whether the first access to this source has already happened. *)
+
+ (* The task of url_of_id is:
+ * - When it is called the first time, and no init_id is present,
+ * the URL file:/// is passed back (an_url). This forces that
+ * absolute path names /path/dir/... will be interpreted as
+ * file path names. (But relative path names will not work.)
+ * - If an init_id has been passed, we can assume that the opened URL
+ * is exactly this init_id. By raising Not_competent it is indicated
+ * that the standard method is to be used for the interpretation of
+ * the URL.
+ * - Otherwise, the channel is already being read, and thus cannot again
+ * opened. (This case is handled in channel_of_url.)
+ *)
+
+ let url_of_id xid =
+ if !init_channel_done then begin
+ (* Use the normal way of determining the URL of the ID: *)
+ raise Pxp_reader.Not_competent
+ end
+ else begin
+ match init_id with
+ None ->
+ an_url
+ (* If the channel is not associated with any URL: Simply pass
+ * the URL file:/// back.
+ *)
+ | Some the_init_id ->
+ assert (the_init_id = xid);
+ raise Pxp_reader.Not_competent
+ (* If the channel is associated with a URL, the corresponding
+ * ID must be passed when the first invocation happens.
+ *)
+ end
+ in
+
+ (* The task of channel_of_url:
+ * - If it is called the first time ("else"), the channel is returned
+ * - Otherwise, the channel is already being read, and thus cannot again
+ * opened. By raising Not_competent it is signaled that the
+ * resolve_as_file object must not continue to open the URL.
+ *)
+
+ let channel_of_url url =
+ if !init_channel_done then
+ raise Pxp_reader.Not_competent
+ else begin
+ init_channel_done := true;
+ ch, fixenc
+ end
+ in
+
+ let r =
+ new Pxp_reader.resolve_as_file
+ ?system_encoding:system_encoding
+ ~url_of_id:url_of_id
+ ~channel_of_url:channel_of_url
+ ()
+ in
+
+ let init_xid =
+ match init_id with
+ None -> Anonymous
+ | Some id ->
+ (* Note: 'id' may be illegal (malformed); in this case, the first
+ * invocation of url_of_id will raise Not_competent, and the 'open_in'
+ * method will fail.
+ *)
+ id
+ in
+
+ ExtID(init_xid, r)
+;;
+
+
+let from_file ?system_encoding utf8_filename =
+
+ let r =
+ new Pxp_reader.resolve_as_file
+ ?system_encoding:system_encoding
+ ()
+ in
+
+ let utf8_abs_filename =
+ if utf8_filename <> "" && utf8_filename.[0] = '/' then
+ utf8_filename
+ else
+ Sys.getcwd() ^ "/" ^ utf8_filename
+ in
+
+ let syntax = { Neturl.ip_url_syntax with Neturl.url_accepts_8bits = true } in
+ let url = Neturl.make_url
+ ~scheme:"file"
+ ~host:"localhost"
+ ~path:(Neturl.split_path utf8_abs_filename)
+ syntax
+ in
+
+ let xid = System (Neturl.string_of_url url) in
+
+
+ ExtID(xid, r)
+;;
+
+
+let from_string ?fixenc s =
+ let r =
+ new Pxp_reader.resolve_read_this_string ?fixenc:fixenc s in
+ ExtID(Anonymous, r)
+;;
+
+
+(**********************************************************************)
+
+class ['ext] parser_object
+ init_doc init_dtd init_extend_dtd init_config init_resolver init_spec
+ init_process_xmldecl transform_dtd id_index
+ =
+ object (self)
+
+ (* Note that the 'ext parameter has been the motivation to make the
+ * parser a class.
+ *)
+
+ val mutable dtd = init_dtd
+ (* The DTD being parsed; or the DTD currently assumed *)
+
+ val extend_dtd = init_extend_dtd
+ (* Whether the DTD should be extended by ELEMENT, ATTLIST, and
+ * NOTATION declarations or not. (True for validating mode,
+ * false for well-formedness mode.)
+ *)
+
+ val transform_dtd = transform_dtd
+ (* A function transforming the DTD *)
+
+ val id_index = (id_index : 'ext index option)
+ (* The ID index or None *)
+
+ val process_xmldecl = init_process_xmldecl
+ (* Whether the XML declaration is parsed and the found XML version
+ * and standalone declaration are passed to 'doc'.
+ *)
+
+ val lexerset = Pxp_lexers.get_lexer_set (init_config.encoding)
+
+ val doc = init_doc
+ (* The current document *)
+
+ method doc = (doc : 'ext document)
+
+ val resolver = init_resolver
+ (* The resolver for external IDs *)
+
+ val config = init_config
+ (* The current configuration *)
+
+ val elstack = (Stack.create() : ('ext node * entity_id) Stack.t)
+ (* The element stack containing all open elements, i.e. elements that
+ * have begun by a start tag but that have not been finished (end tag).
+ * If the parser sees a start tag, it creates the element and pushes it
+ * on top of this stack. If the parser recognizes an end tag, it pulls
+ * one element from the stack and checks if it has the same name as
+ * given with the end tag.
+ *
+ * At initialization time, a special element is pushed on the stack,
+ * the so-called super root. It is always the bottommost
+ * element of the stack, and serves as a guard.
+ * [See "initializer" below.]
+ *)
+
+ method current =
+ (* Get the top element of the element stack *)
+ try
+ fst(Stack.top elstack)
+ with
+ Stack.Empty -> assert false
+ (* Not possible, because the super root is always the element
+ * at the bottom of the stack.
+ *)
+
+ val mutable n_tags_open = 0
+ (* Number of begin tags that have been parsed and whose corresponding
+ * end tags have not yet been parsed
+ *)
+
+ val mutable p_internal_subset = false
+ (* true while parsing the internal subset - there are some additional
+ * constraints for internal subsets, and because of this it must
+ * be known whether the current declaration is contained in the
+ * internal or external subset of the DTD.
+ *)
+
+ val mutable root = None
+ (* Contains the root element (topmost element) while it is being parsed
+ * and after it has been parsed.
+ * This variable is None before the root element is seen.
+ *)
+
+ method root = root
+
+ val spec = init_spec
+ (* A hashtable that contains exemplar objects for the various element
+ * types. If an element is parsed, the exemplar is looked up and
+ * "cloned" (by the "create" method)
+ *)
+
+ val mutable current_data = []
+ (* Collects character data. *)
+
+ method collect_data s =
+ (* Collects the character material 's' *)
+ current_data <- s :: current_data
+
+ method save_data =
+ (* Puts the material collected in 'current_data' into a new
+ * node, and appends this node as new sub node to 'current'
+ *)
+ match current_data with
+ [] ->
+ ()
+ | [ str ] ->
+ if str <> "" then
+ self # current # add_node (create_data_node spec dtd str);
+ current_data <- []
+ | _ ->
+ let count = List.fold_left
+ (fun acc s -> acc + String.length s)
+ 0
+ current_data in
+ let str = String.create count in
+ let pos = ref count in
+ List.iter
+ (fun s ->
+ let l = String.length s in
+ pos := !pos - l;
+ String.blit
+ ~src:s
+ ~src_pos:0
+ ~dst:str
+ ~dst_pos:(!pos)
+ ~len:l
+ )
+ current_data;
+ assert(!pos = 0);
+ if str <> "" then
+ self # current # add_node (create_data_node spec dtd str);
+ current_data <- []
+
+
+ method only_whitespace data =
+ (* Checks that the string "data" contains only whitespace. On failure,
+ * Validation_error is raised.
+ *)
+ let lexbuf = Lexing.from_string data in
+ let t1 = lexerset.scan_name_string lexbuf in
+ if t1 <> Ignore then
+ raise(WF_error("Data not allowed here"));
+ let t2 = lexerset.scan_name_string lexbuf in
+ if t2 <> Eof then
+ raise(WF_error("Data not allowed here"));
+ ()
+
+ initializer
+ (* CHECKS: *)
+ if config.encoding <> dtd # encoding then
+ failwith("Encoding mismatch");
+
+ (* --- Initialize 'elstack': Push the super-root on the stack. *)
+ let super_root =
+ if config.enable_super_root_node then
+ create_super_root_node spec dtd
+ else
+ (* because spec may not contain an exemplar for the super root: *)
+ create_no_node spec dtd
+ in
+ (* Move the super root or the emulation to the stack: *)
+ Stack.push (super_root, (self :> entity_id)) elstack;
+
+
+
+ (********* Here the method "parse" begins. The grammar below is
+ * transformed to a local function of this method
+ *)
+
+ method parse context start_symbol =
+
+ let parse_ignored_section yy_current yy_get_next =
+ (* A special parser which should be used after <![IGNORE[.
+ * It parses until the corresponding ]]> is found.
+ *)
+
+ while yy_current() = Ignore do
+ ignore(yy_get_next());
+ done;
+
+ ( match yy_current() with
+ Conditional_body _ -> ()
+ | _ -> raise Parsing.Parse_error;
+ );
+
+ let en = context.manager # current_entity in
+ let llev = ref 1 in
+ while !llev >= 1 do
+ let igntok = en # next_ignored_token in
+ (* next_ignored_token: uses a special lexer that only
+ * recognizes Conditional_begin and Conditional_end;
+ * other character combinations are ignored.
+ *)
+ (* NOTE: next_ignored_token works much like yy_get_next,
+ * but it does not set the current token!
+ *)
+ match igntok with
+ Conditional_begin _ ->
+ llev := !llev + 1
+ | Conditional_end _ ->
+ llev := !llev - 1;
+ (* Because the loop may be exited now: *)
+ context.current_token <- igntok;
+ | (End_entity | Eof) ->
+ raise Parsing.Parse_error
+ | _ ->
+ ()
+ done;
+
+ in
+
+
+ let check_and_parse_xmldecl xmldecl =
+ if process_xmldecl then begin
+ let v, _, s = decode_doc_xml_pi (decode_xml_pi xmldecl) in
+ check_version_num v;
+ doc # init_xml_version v;
+ let v = match s with
+ None -> false
+ | Some "yes" -> true
+ | Some "no" -> false
+ | _ -> raise (WF_error("Illegal 'standalone' declaration"))
+ in
+ if config.recognize_standalone_declaration then
+ dtd # set_standalone_declaration v
+ end
+ in
+
+ let recode_utf8 s =
+ (* Recode 's' to UTF-8 *)
+ if config.encoding = `Enc_utf8 then
+ s (* No recoding necessary *)
+ else
+ Netconversion.recode_string
+ ~in_enc:(config.encoding :> encoding) ~out_enc:`Enc_utf8 s
+ in
+
+
+%%
+
+/* The following grammar looks similar to ocamlyacc grammars, but
+ * ocamlyacc is actually not used to transform the grammar into a parser.
+ * Instead, the parser generator m2parsergen is applied.
+ *
+ * The format of the grammar is different (see m2parsergen/README),
+ * but I hope that you can understand most features immediately.
+ *
+ * The type of the parser is different: m2parsergen creates a top-down
+ * parser while ocamlyacc generates a LALR-1 parser.
+ *
+ * The way the generated code is called is different: ocamlyacc produces
+ * lots of top-level definitions whereas m2parsergen generates only
+ * a local let-in-phrase. This is explained in the already mentioned
+ * README file.
+ */
+
+/* See Pxp_types.ml for comments to the various tokens */
+
+%token Begin_entity
+%token End_entity
+%token Comment_begin
+%token Comment_end
+%token Ignore
+%token Eq
+%token Rangle
+%token Rangle_empty
+%token <> Conditional_begin
+%token <> Conditional_body
+%token <> Conditional_end
+%token Percent
+%token Plus
+%token Star
+%token Bar
+%token Comma
+%token Qmark
+%token Pcdata
+%token Required
+%token Implied
+%token Fixed
+%token Eof
+
+%token <> Comment_material
+%token <> Doctype
+%token <> Doctype_rangle
+%token <> Dtd_begin
+%token <> Dtd_end
+%token <> Decl_element
+%token <> Decl_attlist
+%token <> Decl_entity
+%token <> Decl_notation
+%token <> Decl_rangle
+%token <> Lparen
+%token <> Rparen
+%token <> RparenPlus
+%token <> RparenStar
+%token <> RparenQmark
+
+%token <> Tag_beg
+%token <> Tag_end
+
+%token <> PI
+%token <> PI_xml
+%token <> Cdata
+%token <> CRef
+%token <> ERef
+%token <> PERef
+%token <> CharData
+%token <> LineEnd
+%token <> Name
+%token <> Nametoken
+%token <> Attval
+%token <> Attval_nl_normalized
+%token <> Unparsed_string
+
+/* START SYMBOLS:
+ *
+ * "ext_document": parses a complete XML document (i.e. containing a
+ * <!DOCTYPE..> and an element)
+ * "ext_declarations": parses an "external DTD subset", i.e. a sequence
+ * of declarations
+ * "ext_element": parses a single element (no <!DOCTYPE...> allowed);
+ * the element needs not to be the root element of the
+ * DTD
+ *
+ * The functions corresponding to these symbols return always () because
+ * they only have side-effects.
+ */
+
+/* SOME GENERAL COMMENTS:
+ *
+ * The parser does not get its tokens from the lexers directly. Instead of
+ * this, there is an entity object between the parser and the lexers. This
+ * object already handles:
+ *
+ * - References to general and parameter entities. The token stream is
+ * modified such that tokens automatically come from the referenced entities.
+ * External parameter entities and all general entities are embraced by
+ * the two special tokens Begin_entity and End_entity. The parser must
+ * check that these braces are correctly nested.
+ */
+
+%%
+
+
+ext_document():
+ Begin_entity
+ doc_xmldecl_then_misc_then_prolog_then_rest() End_entity
+ {{
+ if n_tags_open <> 0 then
+ raise(WF_error("Missing end tag"))
+ }}
+
+
+/* In the following rule, we must find out whether there is an XML declaration
+ * or not, and directly after that either "process_xmldecl" or
+ * "process_missing_xmldecl" of the current entity must be called.
+ * AND IT MUST BE DIRECTLY! Because of this, the invocation is carried out
+ * in the "$" clause immediately following the first token.
+ *
+ * TODO: This is not enough. The first token may be a tag, and the tag
+ * may already contain non-ASCII characters. (But in this case, the resolvers
+ * assume UTF8, and they are right...)
+ */
+
+doc_xmldecl_then_misc_then_prolog_then_rest():
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl;
+ check_and_parse_xmldecl pl;
+ }}
+ misc()* doc_prolog_then_rest()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ misc() misc()* doc_prolog_then_rest()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ doctypedecl() misc()* contents_start()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ contents_start()
+ {{ () }}
+
+
+doc_prolog_then_rest():
+ doctypedecl() misc()* contents_start()
+ {{ () }}
+| contents_start()
+ {{ () }}
+
+
+ext_element():
+ Begin_entity el_xmldecl_then_misc_then_rest() End_entity
+ {{
+ if n_tags_open <> 0 then
+ raise(WF_error("Missing end tag"))
+ }}
+
+
+/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
+
+el_xmldecl_then_misc_then_rest():
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl; }}
+ misc()* contents_start()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ misc() misc()* contents_start()
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ contents_start()
+ {{ () }}
+
+
+ext_declarations():
+ /* Parses a sequence of declarations given by an entity. As side-effect,
+ * the parsed declarations are put into the dtd object.
+ */
+ Begin_entity decl_xmldecl_then_rest()
+ {{ () }}
+| Eof
+ {{ () }}
+
+
+decl_xmldecl_then_rest():
+ /* Note: This rule is also called from declaration()! */
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl;
+ }}
+ declaration()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ declaration() declaration()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ End_entity
+ {{ () }}
+
+
+misc():
+ pi()
+ {{ () }}
+| data: CharData
+ /* In this context, the lexers sometimes do not recognize white space;
+ * instead CharData tokens containing white space are delivered.
+ */
+ {{ self # only_whitespace data }}
+| Ignore
+ {{ () }}
+| comment()
+ {{ () }}
+
+
+/********************* DOCUMENT TYPE DECLARATION *************************/
+
+doctypedecl():
+ /* parses from <!DOCTYPE to >. As side-effect, first the declarations of
+ * the internal DTD (if any) are put into !!on_dtd, then the declarations
+ * of the external DTD (if any) are put into this DTD object.
+ */
+ doctype_entid: Doctype
+ ws: Ignore Ignore*
+ doctypedecl_material (doctype_entid)
+ {{ () }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after `DOCTYPE'"))
+ | _ -> raise(WF_error("Bad DOCTYPE declaration"))
+ }}
+
+
+/* TRICK:
+ * ws: Ignore? Ignore*
+ * is meant seriously. The effect is that ws becomes a boolean variable
+ * which is true if there is an Ignore token and false otherwise.
+ * This construct is faster than just
+ * ws: Ignore*
+ * in which case ws becomes an integer variable containing the number of
+ * Ignore tokens. Counting the number of tokens is slower than only checking
+ * the existence.
+ *
+ * We need the information whether there is an Ignore token (representing
+ * white space), because white space is only obligatory if also an identifier
+ * for the external subset is parsed; this conditional syntax constraint is
+ * simply programmed in the body of the grammar rule.
+ */
+
+doctypedecl_material(doctype_entid):
+ root_name: Name
+ ws: Ignore? Ignore*
+ external_subset: external_id()?
+ Ignore*
+ internal_subset: internal_dtd()?
+ Ignore*
+ doctype_rangle_entid: Doctype_rangle
+ {{
+ if doctype_entid != doctype_rangle_entid then
+ raise (Validation_error("Entities not properly nested with DOCTYPE declaration"));
+ dtd # set_root root_name;
+ begin match external_subset, internal_subset with
+ None, None -> () (* no DTD means no ID *)
+ | None, Some _ -> dtd # set_id Internal
+ | Some id, None -> dtd # set_id (External id)
+ | Some id, Some _ -> dtd # set_id (Derived id)
+ end;
+ (* Get now the external doctype declaration. Note that the internal
+ * subset has precedence and must be read first.
+ *)
+ begin match external_subset with
+ None -> ()
+ | Some id ->
+ if not ws then
+ raise(WF_error("Whitespace is missing after `DOCTYPE " ^
+ root_name ^ "'"));
+ let r' = resolver # clone in
+ let pobj =
+ new parser_object
+ (new document config.warner)
+ dtd
+ extend_dtd
+ config
+ r'
+ spec
+ process_xmldecl
+ (fun x -> x)
+ None
+ in
+ let en = new external_entity r' dtd "[dtd]"
+ config.warner id false config.errors_with_line_numbers
+ config.encoding
+ in
+ en # set_debugging_mode (config.debugging_mode);
+ let mgr = new entity_manager en in
+ en # open_entity true Declaration;
+ try
+ let context = make_context mgr in
+ pobj # parse context Ext_declarations;
+ ignore(en # close_entity);
+ with
+ error ->
+ ignore(en # close_entity);
+ r' # close_all;
+ let pos = mgr # position_string in
+ raise (At(pos, error))
+ end;
+ dtd # validate
+ }}
+ ? {{
+ match !yy_position with
+ "doctype_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad DOCTYPE declaration"))
+ }}
+
+/* Note that there are no keywords for SYSTEM or PUBLIC, as these would
+ * be difficult to recognize in the lexical contexts. Because of this,
+ * SYSTEM/PUBLIC is parsed as name, and the rule for everything after
+ * SYSTEM/PUBLIC is computed dynamically.
+ */
+
+external_id():
+ tok:Name
+ $ {{
+ let followup =
+ match tok with
+ "SYSTEM" -> parse_system_id
+ (* Apply the rule system_id (below) to parse the
+ * rest of the ID
+ *)
+ | "PUBLIC" -> parse_public_id
+ (* Apply the rule public_id (below) to parse the
+ * rest of the ID
+ *)
+ | _ -> raise(WF_error("SYSTEM or PUBLIC expected"))
+ in
+ }}
+ ws:Ignore Ignore*
+ r:[followup]()
+ {{ r }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after " ^ tok))
+ | _ -> raise(WF_error("Bad SYSTEM or PUBLIC identifier"))
+ }}
+
+
+system_id():
+ str:Unparsed_string
+ {{ System (recode_utf8 str) }}
+
+
+public_id():
+ str1: Unparsed_string
+ ws: Ignore Ignore*
+ str2: Unparsed_string
+ {{ check_public_id str1;
+ Public(recode_utf8 str1, recode_utf8 str2)
+ }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing between the literals of the PUBLIC identifier"))
+ | _ -> raise(WF_error("Bad PUBLIC identifier"))
+ }}
+
+
+/* The internal subset: "[" declaration* "]". While parsing the declarations
+ * the object variable p_internal_subset must be true; however, if there
+ * are entity references, this variable must be reset to false during
+ * the entity. (See the rule for "declaration" below.)
+ */
+
+internal_dtd():
+ dtd_begin_entid: internal_dtd_begin()
+ declaration()*
+ dtd_end_entid: internal_dtd_end()
+ {{
+ if dtd_begin_entid != dtd_end_entid then
+ raise(Validation_error("Entities not properly nested with internal DTD subset"))
+ }}
+ ? {{ match !yy_position with
+ "dtd_end_entid" -> raise(WF_error("`]' expected"))
+ | _ -> raise(WF_error("Bad internal DTD subset"))
+ }}
+
+
+internal_dtd_begin():
+ Dtd_begin
+ {{ assert (not p_internal_subset);
+ p_internal_subset <- true }}
+
+
+internal_dtd_end():
+ Dtd_end
+ {{ assert p_internal_subset;
+ p_internal_subset <- false }}
+
+
+declaration():
+ /* Parses a single declaration (or processing instruction). As side-effect
+ * the parsed declaration is stored into the dtd object.
+ */
+ elementdecl()
+ {{ () }}
+| attlistdecl()
+ {{ () }}
+| entid:Decl_entity ws:Ignore Ignore* e:entitydecl(entid)
+ {{ () }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after ENTITY"))
+ | "e" -> raise(WF_error("Name or `%' expected"))
+ | _ -> raise(WF_error("Bad entity declaration"))
+ }}
+| notationdecl()
+ {{ () }}
+| pi: PI
+ {{ let target, value = pi in
+ let pi = new proc_instruction target value config.encoding in
+ dtd # add_pinstr pi
+ }}
+| Ignore
+ {{ () }}
+| Comment_begin Comment_material* ce:Comment_end
+ {{ () }}
+ ? {{ match !yy_position with
+ "ce" -> raise(WF_error("`-->' expected"))
+ | _ -> raise(WF_error("Bad comment"))
+ }}
+| Begin_entity
+ $ {{ (* Set 'p_internal_subset' to 'false' until the matching 'end_entity'
+ * rule is parsed. This allows unrestricted usage of parameter entities
+ * within declarations of internal entities.
+ *)
+ let old_p_internal_subset = p_internal_subset in
+ p_internal_subset <- false;
+ }}
+ decl_xmldecl_then_rest()
+ {{ (* Restore the old value of 'p_internal_subset'. *)
+ p_internal_subset <- old_p_internal_subset;
+ ()
+ }}
+| begin_entid:Conditional_begin
+ $ {{ (* Check whether conditional sections are allowed at this position. *)
+ if p_internal_subset then
+ raise(WF_error("Restriction of the internal subset: Conditional sections not allowed"));
+ }}
+ Ignore*
+ cond:conditional_section() end_entid:Conditional_end
+ {{ (* Check whether Conditional_begin and Conditional_end are in the same
+ * entity. (This restriction is explained in the file SPECS.)
+ *)
+ if begin_entid != end_entid then
+ raise(Validation_error("The first and the last token of conditional sections must be in the same entity (additional restriction of this parser)"));
+ }}
+ ? {{ match !yy_position with
+ "end_entid" -> raise(WF_error("`>]>' expected"))
+ | "cond" -> raise(WF_error("INCLUDE or IGNORE expected"))
+ | _ -> raise(WF_error("Bad conditional section"))
+ }}
+
+/* The tokens INCLUDE/IGNORE are scanned as names, and the selection of the
+ * right parsing rule is dynamic.
+ * Note that parse_ignored_section is not defined by a grammar rule but
+ * by a conventional let-binding above.
+ */
+
+conditional_section():
+ include_or_ignore:Name
+ $ {{ let parsing_function =
+ match include_or_ignore with
+ "INCLUDE" -> parse_included_section
+ (* invoke rule "included_section" below *)
+ | "IGNORE" -> parse_ignored_section
+ (* invoke function "parse_ignored_section" *)
+ | _ -> raise(WF_error("INCLUDE or IGNORE expected"))
+ in
+ }}
+ [ parsing_function ] ()
+ {{ () }}
+ ? {{ raise(WF_error("Bad conditional section")) }}
+
+included_section():
+ Conditional_body declaration()*
+ {{ () }}
+| Ignore Ignore* Conditional_body declaration()*
+ {{ () }}
+
+
+/*************************** ELEMENT DECLARATIONS ********************/
+
+elementdecl():
+ /* parses <!ELEMENT ... >. Puts the parsed element type as side-effect into
+ * dtd.
+ */
+ decl_element_entid: Decl_element
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws1: Ignore Ignore*
+ name: Name
+ ws2: Ignore Ignore*
+ content_model: contentspec()
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ {{
+ if decl_element_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ELEMENT declaration");
+ if extend_dtd then begin
+ let el = new dtd_element dtd name in
+ (* It is allowed that an <!ATTLIST...> precedes the corresponding
+ * <!ELEMENT...>. Because of this it is possible that there is already
+ * an element called 'name' in the DTD, and we only must set the content
+ * model of this element.
+ *)
+ try
+ dtd # add_element el;
+ el # set_cm_and_extdecl content_model extdecl;
+ with
+ Not_found -> (* means: there is already an element 'name' *)
+ let el' = dtd # element name in
+ el' # set_cm_and_extdecl content_model extdecl;
+ (* raises Validation_error if el' already has a content model *)
+ end
+ }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
+ | "name" -> raise(WF_error("The name of the element is expected here"))
+ | "content_model" -> raise(WF_error("Content model expression expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad element type declaration"))
+ }}
+
+contentspec():
+ /* parses a content model and returns it (type content_model_type) */
+ name: Name /* EMPTY or ANY */
+ {{ match name with
+ "EMPTY" -> Empty
+ | "ANY" -> Any
+ | _ -> raise(WF_error("EMPTY, ANY, or a subexpression expected"))
+ }}
+| entid:Lparen Ignore* term:mixed_or_regexp(entid)
+ {{ term }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+/* Many of the following rules have an lparen_entid argument. This is the
+ * internal ID of the entity containing the corresponding left parenthesis;
+ * by comparing it with the ID of the entity of the right parenthesis the
+ * contraint is implemented that both parentheses must be in the same entity.
+ */
+
+mixed_or_regexp(lparen_entid):
+ re: choice_or_seq(lparen_entid)
+ {{ Regexp re }}
+| m: mixed(lparen_entid)
+ {{ m }}
+
+
+multiplier():
+ /* returns one of the multiplier symbols (?,*,+) */
+ Plus
+ {{ Plus }}
+| Star
+ {{ Star }}
+| Qmark
+ {{ Qmark }}
+
+
+mixed (lparen_entid) :
+ Pcdata
+ Ignore*
+ material: mixed_alternatives_top()
+ {{
+ let rest, rparen_entid = material in
+ if lparen_entid != rparen_entid then
+ raise (Validation_error "Entities not properly nested with parentheses");
+ Mixed (MPCDATA :: rest)
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+mixed_alternatives_top():
+ entid: Rparen
+ {{ [], entid }}
+| entid: RparenStar
+ {{ [], entid }}
+| Bar Ignore* name:Name Ignore* names:mixed_alternative()* entid:RparenStar
+ {{
+ (MChild name :: names), entid
+ }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | "entid" -> raise(WF_error("`)*' expected"))
+ | _ -> raise(WF_error("Bad content model expression"))
+ }}
+
+
+mixed_alternative() :
+ Bar Ignore* name:Name Ignore*
+ {{ MChild name }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | _ -> raise(WF_error("Bad content model expression"))
+ }}
+
+
+
+choice_or_seq (lparen_entid):
+ /* parses either a regular expression, or a mixed expression. Returns
+ * Mixed spec or Regexp spec (content_model_type).
+ * Which kind of expression (regexp or mixed) is being read is recognized
+ * after the first subexpression has been parsed; the other subexpressions
+ * must be of the same kind.
+ */
+ re: cp()
+ Ignore*
+ factor: choice_or_seq_factor()
+ {{
+ let (finalmark,subexpr), rparen_entid = factor in
+ if lparen_entid != rparen_entid then
+ raise (Validation_error "Entities not properly nested with parentheses");
+ (* Check that the other subexpressions are "regexp", too, and
+ * merge them with the first.
+ *)
+ let re' =
+ match subexpr with
+ Alt [] -> re
+ | Alt alt -> Alt (re :: alt)
+ | Seq seq -> Seq (re :: seq)
+ | _ -> assert false
+ in
+ (* Interpret the finalmark. *)
+ match finalmark with
+ Ignore -> re'
+ | Plus -> Repeated1 re'
+ | Star -> Repeated re'
+ | Qmark -> Optional re'
+ | _ -> assert false
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+choice_or_seq_factor():
+ /* Parses "|<subexpr>|...)" or ",<subexpr>,...)", both forms optionally
+ * followed by ?, *, or +.
+ * Returns ((finalmark, expr), rparen_entid), where
+ * - finalmark is the character after the right parenthesis or Ignore
+ * - expr is either
+ * Alt [] meaning that only ")" has been found
+ * Alt non_empty_list meaning that the subexpressions are separated by '|'
+ * Seq non_empty_list meaning that the subexpressions are separated by ','
+ */
+ entid:Rparen
+ {{ (Ignore, Alt []), entid }}
+| entid:RparenPlus
+ {{ (Plus, Alt []), entid }}
+| entid:RparenStar
+ {{ (Star, Alt []), entid }}
+| entid:RparenQmark
+ {{ (Qmark, Alt []), entid }}
+| Bar Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
+ {{
+ let (finalmark, subexpr), rparen_entid = factor in
+ begin match subexpr with
+ Alt [] -> (finalmark, (Alt [re])), rparen_entid
+ | Alt alt -> (finalmark, (Alt (re :: alt))), rparen_entid
+ | _ -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
+ end
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+| Comma Ignore* re:cp() Ignore* factor:choice_or_seq_factor()
+ {{
+ let (finalmark, subexpr), rparen_entid = factor in
+ begin match subexpr with
+ Alt [] -> (finalmark, (Seq [re])), rparen_entid
+ | Seq seq -> (finalmark, (Seq (re :: seq))), rparen_entid
+ | _ -> raise(WF_error("It is not allowed to mix alternatives and sequences"))
+ end
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+cp():
+ /* parse either a name, or a parenthesized subexpression "(...)" */
+ name:Name m:multiplier()?
+ {{ match m with
+ None -> Child name
+ | Some Plus -> Repeated1 (Child name)
+ | Some Star -> Repeated (Child name)
+ | Some Qmark -> Optional (Child name)
+ | _ -> assert false
+ }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+| entid:Lparen Ignore* m:choice_or_seq(entid)
+ {{ m }}
+ ? {{ raise(WF_error("Bad content model expression")) }}
+
+
+/********************* ATTRIBUTE LIST DECLARATION ***********************/
+
+attlistdecl():
+ /* parses <!ATTLIST ... >. Enters the attribute list in dtd as side-
+ * effect.
+ */
+ decl_attlist_entid: Decl_attlist
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws1: Ignore Ignore*
+ el_name: Name
+ ws: Ignore? Ignore*
+ factor: attdef_factor()
+ {{
+ let at_list, decl_rangle_entid = factor in
+
+ if decl_attlist_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ATTLIST declaration");
+
+ if not ws && at_list <> [] then begin
+ match at_list with
+ (name,_,_) :: _ ->
+ (* This is normally impossible, because the lexer demands
+ * some other token between two names.
+ *)
+ raise(WF_error("Whitespace is missing before `" ^ name ^ "'"));
+ | _ -> assert false
+ end;
+
+ if extend_dtd then begin
+ let new_el = new dtd_element dtd el_name in
+ (* Note that it is allowed that <!ATTLIST...> precedes the corresponding
+ * <!ELEMENT...> declaration. In this case we add the element declaration
+ * already to the DTD but leave the content model unspecified.
+ *)
+ let el =
+ try
+ dtd # add_element new_el;
+ new_el
+ with
+ Not_found -> (* already added *)
+ let old_el = dtd # element el_name in
+ if old_el # attribute_names <> [] then
+ config.warner # warn ("More than one ATTLIST declaration for element type `" ^
+ el_name ^ "'");
+ old_el
+ in
+ List.iter
+ (fun (a_name, a_type, a_default) ->
+ el # add_attribute a_name a_type a_default extdecl)
+ at_list
+ end
+ }}
+ ? {{ match !yy_position with
+ "ws1" -> raise(WF_error("Whitespace is missing after ATTLIST"))
+ | "el_name" -> raise(WF_error("The name of the element is expected here"))
+ | "factor" -> raise(WF_error("Another attribute name or `>' expected"))
+ | _ -> raise(WF_error("Bad attribute declaration"))
+ }}
+
+
+attdef_factor():
+ /* parses a list of triples <name> <type> <default value> and returns the
+ * list as (string * att_type * att_default) list.
+ */
+ attdef:attdef() ws:Ignore? Ignore* factor:attdef_factor()
+ {{
+ let attdef_rest, decl_rangle_entid = factor in
+ if not ws && attdef_rest <> [] then begin
+ match attdef_rest with
+ (name,_,_) :: _ ->
+ raise(WF_error("Missing whitespace before `" ^ name ^ "'"));
+ | _ -> assert false
+ end;
+ (attdef :: attdef_rest), decl_rangle_entid }}
+ ? {{ match !yy_position with
+ | "factor" -> raise(WF_error("Another attribute name or `>' expected"))
+ | _ -> raise(WF_error("Bad attribute declaration"))
+ }}
+| entid:Decl_rangle
+ {{ [], entid }}
+
+
+attdef():
+ /* Parses a single triple */
+ name: Name
+ ws1: Ignore Ignore*
+ tp: atttype()
+ ws2: Ignore Ignore*
+ default: defaultdecl()
+ {{ (name,tp,default) }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
+ | "tp" -> raise(WF_error("Type of attribute or `(' expected"))
+ | "default" -> raise(WF_error("#REQUIRED, #IMPLIED, #FIXED or a string literal expected"))
+ | _ -> raise(WF_error("Bad attribute declaration"))
+ }}
+
+atttype():
+ /* Parses an attribute type and returns it as att_type. */
+ name: Name
+ $ {{ let followup =
+ if name = "NOTATION" then
+ parse_notation
+ else
+ parse_never
+ in
+ }}
+ nota: [followup]()?
+ {{
+ match name with
+ "CDATA" -> A_cdata
+ | "ID" -> A_id
+ | "IDREF" -> A_idref
+ | "IDREFS" -> A_idrefs
+ | "ENTITY" -> A_entity
+ | "ENTITIES" -> A_entities
+ | "NMTOKEN" -> A_nmtoken
+ | "NMTOKENS" -> A_nmtokens
+ | "NOTATION" ->
+ (match nota with
+ None -> raise(WF_error("Error in NOTATION type (perhaps missing whitespace after NOTATION?)"))
+ | Some n -> n
+ )
+ | _ -> raise(WF_error("One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected"))
+ }}
+ ? {{ raise(WF_error("Bad attribute declaration (perhaps missing whitespace after NOTATION)")) }}
+
+| Lparen
+ Ignore*
+ name: name_or_nametoken()
+ Ignore*
+ names: nmtoken_factor()*
+ rp: Rparen
+ /* Enumeration */
+ {{ A_enum(name :: names) }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | "rp" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | _ -> raise(WF_error("Bad enumeration type"))
+ }}
+
+
+never():
+ /* The always failing rule */
+ $ {{ raise Not_found; }}
+ Doctype /* questionable */
+ {{ A_cdata (* Does not matter *)
+ }}
+
+
+notation():
+ Ignore Ignore*
+ lp: Lparen
+ Ignore*
+ name: Name
+ Ignore*
+ names: notation_factor()*
+ rp: Rparen
+ {{ A_notation(name :: names) }}
+ ? {{ match !yy_position with
+ "lp" -> raise(WF_error("`(' expected"))
+ | "name" -> raise(WF_error("Name expected"))
+ | "names" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | "rp" -> raise(WF_error("`|' and more names expected, or `)'"))
+ | _ -> raise(WF_error("Bad NOTATION type"))
+ }}
+
+
+notation_factor():
+ /* Parse "|<name>" and return the name */
+ Bar Ignore* name:Name Ignore*
+ {{ name }}
+ ? {{ match !yy_position with
+ "name" -> raise(WF_error("Name expected"))
+ | _ -> raise(WF_error("Bad NOTATION type"))
+ }}
+
+nmtoken_factor():
+ /* Parse "|<nmtoken>" and return the nmtoken */
+ Bar Ignore* n:name_or_nametoken() Ignore*
+ {{ n }}
+ ? {{ match !yy_position with
+ "n" -> raise(WF_error("Nametoken expected"))
+ | _ -> raise(WF_error("Bad enumeration type"))
+ }}
+
+
+name_or_nametoken():
+ n:Name {{ n }}
+| n:Nametoken {{ n }}
+
+
+/* The default values must be expanded and normalized. This has been implemented
+ * by the function expand_attvalue.
+ */
+
+
+defaultdecl():
+ /* Parse the default value for an attribute and return it as att_default */
+ Required
+ {{ D_required }}
+| Implied
+ {{ D_implied }}
+| Fixed ws:Ignore Ignore* str:Unparsed_string
+ {{ D_fixed (expand_attvalue lexerset dtd str config.warner false) }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after #FIXED"))
+ | "str" -> raise(WF_error("String literal expected"))
+ | _ -> raise(WF_error("Bad #FIXED default value"))
+ }}
+| str:Unparsed_string
+ {{ D_default (expand_attvalue lexerset dtd str config.warner false) }}
+
+
+/**************************** ENTITY DECLARATION ***********************/
+
+entitydecl(decl_entity_entid):
+ /* parses everything _after_ <!ENTITY until the matching >. The parsed
+ * entity declaration is entered into the dtd object as side-effect.
+ */
+ name: Name
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws: Ignore Ignore*
+ material: entitydef()
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ /* A general entity */
+ {{
+ if decl_entity_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ENTITY declaration");
+ let en =
+ (* Distinguish between
+ * - internal entities
+ * - external entities
+ * - NDATA (unparsed) entities
+ *)
+ match material with
+ (Some s, None, None) ->
+ new internal_entity dtd name config.warner s p_internal_subset
+ config.errors_with_line_numbers false config.encoding
+ | (None, Some xid, None) ->
+ new external_entity (resolver # clone) dtd name config.warner
+ xid false config.errors_with_line_numbers
+ config.encoding
+
+ | (None, Some xid, Some n) ->
+ (new ndata_entity name xid n config.encoding :> entity)
+ | _ -> assert false
+ in
+ dtd # add_gen_entity en extdecl
+ }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing"))
+ | "material" -> raise(WF_error("String literal or identifier expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad entity declaration"))
+ }}
+
+| Percent
+ $ {{ let extdecl = context.manager # current_entity_counts_as_external in
+ }}
+ ws1: Ignore Ignore*
+ name: Name
+ ws2: Ignore Ignore*
+ material: pedef()
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ /* A parameter entity */
+ {{
+ if decl_entity_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with ENTITY declaration");
+ let en =
+ (* Distinguish between internal and external entities *)
+ match material with
+ (Some s, None) ->
+ new internal_entity dtd name config.warner s p_internal_subset
+ config.errors_with_line_numbers true config.encoding
+ | (None, Some xid) ->
+ new external_entity (resolver # clone) dtd name config.warner
+ xid true config.errors_with_line_numbers
+ config.encoding
+ | _ -> assert false
+ in
+
+ (* The following two lines force that even internal entities count
+ * as external (for the standalone check) if the declaration of
+ * the internal entity occurs in an external entity.
+ *)
+ if extdecl then
+ en # set_counts_as_external;
+
+ dtd # add_par_entity en;
+ }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2") -> raise(WF_error("Whitespace is missing"))
+ | "material" -> raise(WF_error("String literal or identifier expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad entity declaration"))
+ }}
+
+
+entitydef():
+ /* parses the definition value of a general entity. Returns either:
+ * - (Some s, None, None) meaning the definition of an internal entity
+ * with (literal) value s has been found
+ * - (None, Some x, None) meaning that an external parsed entity with
+ * external ID x has been found
+ * - (None, Some x, Some n) meaning that an unparsed entity with
+ * external ID x and notations n has been found
+ */
+ str:Unparsed_string
+ {{ Some str, None, None }}
+| id:external_id() ws:Ignore? Ignore* decl:ndatadecl()?
+ {{ if not ws && decl <> None then
+ raise(WF_error("Whitespace missing before `NDATA'"));
+ None, Some id, decl
+ }}
+
+
+pedef():
+ /* parses the definition value of a parameter entity. Returns either:
+ * - (Some s, None) meaning that the definition of an internal entity
+ * with (literal) value s has been found
+ * - (None, Some x) meaning that an external ID x has been found
+ */
+ str:Unparsed_string
+ {{ Some str, None }}
+| id:external_id()
+ {{ None, Some id }}
+
+
+ndatadecl():
+ /* Parses either NDATA "string" or the empty string; returns Some "string"
+ * in the former, None in the latter case.
+ */
+ ndata:Name ws:Ignore Ignore* name:Name
+ {{ if ndata = "NDATA" then
+ name
+ else
+ raise(WF_error("NDATA expected"))
+ }}
+ ? {{ match !yy_position with
+ "ws" -> raise(WF_error("Whitespace is missing after NDATA"))
+ | "name" -> raise(WF_error("Name expected"))
+ | _ -> raise(WF_error("Bad NDATA declaration"))
+ }}
+
+/**************************** NOTATION DECLARATION *******************/
+
+notationdecl():
+ /* parses <!NOTATION ... > and enters the notation declaration into the
+ * dtd object as side-effect
+ */
+ decl_notation_entid: Decl_notation
+ ws1: Ignore Ignore*
+ name: Name
+ ws2: Ignore Ignore*
+ sys_or_public: Name /* SYSTEM or PUBLIC */
+ ws3: Ignore Ignore*
+ str1: Unparsed_string
+ ws: Ignore? Ignore*
+ str2: Unparsed_string?
+ Ignore*
+ decl_rangle_entid: Decl_rangle
+ {{
+ if decl_notation_entid != decl_rangle_entid then
+ raise (Validation_error "Entities not properly nested with NOTATION declaration");
+ let xid =
+ (* Note that it is allowed that PUBLIC is only followed by one
+ * string literal
+ *)
+ match sys_or_public with
+ "SYSTEM" ->
+ if str2 <> None then raise(WF_error("SYSTEM must be followed only by one argument"));
+ System (recode_utf8 str1)
+ | "PUBLIC" ->
+ begin match str2 with
+ None ->
+ check_public_id str1;
+ Public(recode_utf8 str1,"")
+ | Some p ->
+ if not ws then
+ raise(WF_error("Missing whitespace between the string literals of the `PUBLIC' id"));
+ check_public_id str1;
+ Public(recode_utf8 str1, recode_utf8 p)
+ end
+ | _ -> raise(WF_error("PUBLIC or SYSTEM expected"))
+ in
+ if extend_dtd then begin
+ let no = new dtd_notation name xid config.encoding in
+ dtd # add_notation no
+ end
+ }}
+ ? {{ match !yy_position with
+ ("ws1"|"ws2"|"ws3") -> raise(WF_error("Whitespace is missing"))
+ | "name" -> raise(WF_error("Name expected"))
+ | "sys_or_public" -> raise(WF_error("SYSTEM or PUBLIC expected"))
+ | ("str1"|"str2") -> raise(WF_error("String literal expected"))
+ | "decl_rangle_entid" -> raise(WF_error("`>' expected"))
+ | _ -> raise(WF_error("Bad NOTATION declaration"))
+ }}
+
+/****************************** ELEMENTS **************************/
+
+/* In the following rules, the number of error rules is reduced to
+ * improve the performance of the parser.
+ */
+
+
+contents_start():
+ /* parses <element>...</element> misc*, i.e. exactly one element followed
+ * optionally by white space or processing instructions.
+ * The element is entered into the global variables as follows:
+ * - If elstack is non-empty, the parsed element is added as new child to
+ * the top element of the stack.
+ * - If elstack is empty, the root_examplar object is modified rather than
+ * that a new element is created. If additionally the variable root is
+ * None, it is assigned Some root_examplar.
+ * Note that the modification of the root_exemplar is done by the method
+ * internal_init.
+ * The reason why the root element is modified rather than newly created
+ * is a typing requirement. It must be possible that the class of the root
+ * is derived from the original class element_impl, i.e. the user must be
+ * able to add additional methods. If we created a new root object, we
+ * would have to denote to which class the new object belongs; the root
+ * would always be an 'element_impl' object (and not a derived object).
+ * If we instead cloned an exemplar object and modified it by the
+ * "create" method, the root object would belong to the same class as the
+ * exemplar (good), but the type of the parsing function would always
+ * state that an 'element_impl' was created (because we can pass the new
+ * object only back via a global variable). The only solution is to
+ * modify the object that has been passed to the parsing function directly.
+ */
+ $ {{ dtd <- transform_dtd dtd; }}
+ start_tag() content()*
+ {{ () }}
+
+
+content():
+ /* parses: start tags, end tags, content, or processing
+ * instructions. That the tags are properly nested is dynamically checked.
+ * As result, recognized elements are added to their parent elements,
+ * content is added to the element containing it, and processing instructions
+ * are entered into the element embracing them. (All as side-effects.)
+ */
+ start_tag()
+ {{ () }}
+| end_tag()
+ {{ () }}
+| char_data()
+ {{ () }}
+| cref()
+ {{ () }}
+| pi()
+ {{ () }}
+| entity_ref()
+ {{ () }}
+| comment()
+ {{ () }}
+
+
+entity_ref():
+ Begin_entity eref_xmldecl_then_rest()
+ {{ if n_tags_open = 0 then
+ raise(WF_error("Entity reference not allowed here"))
+ }}
+
+
+/* See comment for doc_mldecl_then_misc_then_prolog_then_rest. */
+
+eref_xmldecl_then_rest():
+ pl:PI_xml
+ $ {{ context.manager # current_entity # process_xmldecl pl;
+ }}
+ content()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ content() content()* End_entity
+ {{ () }}
+
+| $ {{ context.manager # current_entity # process_missing_xmldecl; }}
+ End_entity
+ {{ () }}
+
+
+start_tag():
+ /* parses <element attribute-values> or <element attribute-values/>.
+ *
+ * EFFECT: If elstack is non-empty, the element is added to the
+ * top element of the stack as new child, and the element
+ * is pushed on the stack. If elstack is empty, the root_exemplar is
+ * modified and gets the parsed name and attribute list. The root_exemplar
+ * is pushed on the stack. If additionally the variable root is empty, too,
+ * this variable is initialized.
+ * If the <element ... /> form has been parsed, no element is pushed
+ * on the stack.
+ */
+ tag: Tag_beg
+ $ {{ let position =
+ if config.store_element_positions then
+ Some(context.manager # position)
+ else
+ None
+ in
+ }}
+ ws: Ignore? Ignore*
+ attlist: attribute()*
+ emptiness: start_tag_rangle()
+ /* Note: it is guaranteed that there is whitespace between Tag_beg and
+ * the name of the first attribute, because there must be some separator.
+ * So we need not to check ws!
+ */
+ {{
+ let rec check_attlist al =
+ match al with
+ (nv1, num1) :: al' ->
+ if not num1 && al' <> [] then begin
+ match al with
+ ((n1,_),_) :: ((n2,_),_) :: _ ->
+ raise(WF_error("Whitespace is missing between attributes `" ^
+ n1 ^ "' and `" ^ n2 ^ "'"))
+ | _ -> assert false
+ end;
+ check_attlist al'
+ | [] -> ()
+ in
+ check_attlist attlist;
+
+ let name, tag_beg_entid = tag in
+ let attlist' = List.map (fun (nv,_) -> nv) attlist in
+ let d =
+ create_element_node ?position:position spec dtd name attlist' in
+
+ begin match id_index with
+ None -> ()
+ | Some idx ->
+ (* Put the ID attribute into the index, if present *)
+ begin try
+ let v = d # id_attribute_value in (* may raise Not_found *)
+ idx # add v d (* may raise ID_not_unique *)
+ with
+ Not_found ->
+ (* No ID attribute *)
+ ()
+ | ID_not_unique ->
+ (* There is already an ID with the same value *)
+ raise(Validation_error("ID not unique"))
+ end
+ end;
+
+ if n_tags_open = 0 then begin
+ if root = None then begin
+ (* We have found the begin tag of the root element. *)
+ if config.enable_super_root_node then begin
+ (* The user wants the super root instead of the real root.
+ * The real root element becomes the child of the VR.
+ *)
+ (* Assertion: self # current is the super root *)
+ assert (self # current # node_type = T_super_root);
+ root <- Some (self # current);
+ self # current # add_node d;
+ doc # init_root (self # current);
+ end
+ else begin
+ (* Normal behaviour: The user wants to get the real root. *)
+ root <- Some d;
+ doc # init_root d;
+ end;
+ end
+ else
+ (* We have found a second topmost element. This is illegal. *)
+ raise(WF_error("Document must consist of only one toplevel element"))
+ end
+ else begin
+ (* We have found some inner begin tag. *)
+ self # save_data; (* Save outstanding data material first *)
+ self # current # add_node d
+ end;
+
+ if emptiness then
+ (* An empty tag like <a/>. *)
+ d # local_validate ~use_dfa:config.validate_by_dfa ()
+ else begin
+ (* A non-empty tag. *)
+ Stack.push (d, tag_beg_entid) elstack;
+ n_tags_open <- n_tags_open + 1;
+ end;
+ }}
+ ? {{ match !yy_position with
+ "attlist" -> raise(WF_error("Bad attribute list"))
+ | "emptiness" -> raise(WF_error("`>' or `/>' expected"))
+ | _ -> raise(WF_error("Bad start tag"))
+ }}
+
+
+attribute():
+ /* Parses name="value" */
+ n:Name Ignore* Eq Ignore* v:attval() ws:Ignore? Ignore*
+ {{ (n,v), ws }}
+
+
+attval():
+ v:Attval
+ {{ expand_attvalue lexerset dtd v config.warner true }}
+| v:Attval_nl_normalized
+ {{ expand_attvalue lexerset dtd v config.warner false }}
+
+
+start_tag_rangle():
+ Rangle {{ false }}
+| Rangle_empty {{ true }}
+
+
+end_tag():
+ /* parses </element>.
+ * Pops the top element from the elstack and checks if it is the same
+ * element.
+ */
+ tag:Tag_end Ignore* Rangle
+ {{ let name, tag_end_entid = tag in
+ if n_tags_open = 0 then
+ raise(WF_error("End-tag without start-tag"));
+
+ self # save_data; (* Save outstanding data material first *)
+
+ let x, tag_beg_entid = Stack.pop elstack in
+ let x_name =
+ match x # node_type with
+ | T_element n -> n
+ | _ -> assert false
+ in
+ if name <> x_name then
+ raise(WF_error("End-tag does not match start-tag"));
+ if tag_beg_entid != tag_end_entid then
+ raise(WF_error("End-tag not in the same entity as the start-tag"));
+ x # local_validate ~use_dfa:config.validate_by_dfa ();
+
+ n_tags_open <- n_tags_open - 1;
+
+ assert (n_tags_open >= 0);
+
+ }}
+
+char_data():
+ /* Parses any literal characters not otherwise matching, and adds the
+ * characters to the top element of elstack.
+ * If elstack is empty, it is assumed that there is no surrounding
+ * element, and any non-white space character is forbidden.
+ */
+ data:CharData
+ {{
+ if n_tags_open = 0 then
+ (* only white space is allowed *)
+ self # only_whitespace data
+ else
+ self # collect_data data
+ (* We collect the chardata material until the next end tag is
+ * reached. Then the collected material will concatenated and
+ * stored as a single T_data node (see end_tag rule above)
+ * using save_data.
+ *)
+ }}
+| data:Cdata
+ {{
+ if n_tags_open = 0 then
+ raise (WF_error("CDATA section not allowed here"));
+ self # collect_data data
+ (* Also collect CDATA material *)
+ }}
+
+cref():
+ /* Parses &#...; and adds the character to the top element of elstack. */
+ code:CRef
+ {{
+ if n_tags_open = 0 then
+ (* No surrounding element: character references are not allowed *)
+ raise(WF_error("Character reference not allowed here"));
+ self # collect_data (character config.encoding config.warner code)
+ (* Also collect character references *)
+ }}
+
+pi():
+ /* Parses <?...?> (but not <?xml white-space ... ?>).
+ * If there is a top element in elstack, the processing instruction is added
+ * to this element.
+ */
+ pi: PI
+ {{
+ let position =
+ if config.store_element_positions then
+ Some(context.manager # position)
+ else
+ None
+ in
+ let target,value = pi in
+
+ if n_tags_open = 0 & not config.enable_super_root_node
+ then
+ doc # add_pinstr (new proc_instruction target value config.encoding)
+ else begin
+ (* Special case: if processing instructions are processed inline,
+ * they are wrapped into T_pinstr nodes.
+ *)
+ if config.enable_pinstr_nodes then begin
+ self # save_data; (* Save outstanding data material first *)
+ let pinstr = new proc_instruction target value config.encoding in
+ let wrapper = create_pinstr_node
+ ?position:position spec dtd pinstr in
+ wrapper # local_validate(); (* succeeds always *)
+ self # current # add_node wrapper;
+ end
+ else
+ (* Normal behaviour: Add the PI to the parent element. *)
+ self # current # add_pinstr
+ (new proc_instruction target value config.encoding)
+ end
+ }}
+
+
+comment():
+ /* Parses <!-- ... -->
+ */
+ Comment_begin
+ $ {{
+ let position =
+ if config.enable_comment_nodes && config.store_element_positions then
+ Some(context.manager # position)
+ else
+ None
+ in
+ }}
+ mat: Comment_material*
+ ce: Comment_end
+ {{
+ if config.enable_comment_nodes then begin
+ self # save_data; (* Save outstanding data material first *)
+ let comment_text = String.concat "" mat in
+ let wrapper = create_comment_node
+ ?position:position spec dtd comment_text in
+ wrapper # local_validate(); (* succeeds always *)
+ self # current # add_node wrapper;
+ end
+ }}
+ ? {{ match !yy_position with
+ | "ce" -> raise(WF_error("`-->' expected"))
+ | _ -> raise(WF_error("Bad comment"))
+ }}
+
+
+%%
+ (* The method "parse" continues here... *)
+
+ try
+ match start_symbol with
+ Ext_document ->
+ parse_ext_document context.current context.get_next
+ | Ext_declarations ->
+ parse_ext_declarations context.current context.get_next
+ | Ext_element ->
+ parse_ext_element context.current context.get_next
+ with
+ Not_found ->
+ raise Parsing.Parse_error
+
+ (*********** The method "parse" ends here *************)
+
+
+(**********************************************************************)
+
+(* Here ends the class definition: *)
+end
+;;
+
+(**********************************************************************)
+
+open Pxp_reader;;
+
+
+class default_ext =
+ object(self)
+ val mutable node = (None : ('a extension node as 'a) option)
+ method clone = {< >}
+ method node =
+ match node with
+ None ->
+ assert false
+ | Some n -> n
+ method set_node n =
+ node <- Some n
+ end
+;;
+
+
+let default_extension = new default_ext;;
+
+let default_spec =
+ make_spec_from_mapping
+ ~super_root_exemplar: (new element_impl default_extension)
+ ~comment_exemplar: (new element_impl default_extension)
+ ~default_pinstr_exemplar: (new element_impl default_extension)
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: (new element_impl default_extension)
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+;;
+
+
+let idref_pass id_index root =
+ let error t att value =
+ let name =
+ match t # node_type with
+ T_element name -> name
+ | _ -> assert false
+ in
+ let text =
+ "Attribute `" ^ att ^ "' of element `" ^ name ^
+ "' refers to unknown ID `" ^ value ^ "'" in
+ let pos_ent, pos_line, pos_col = t # position in
+ if pos_line = 0 then
+ raise(Validation_error text)
+ else
+ raise(At("In entity " ^ pos_ent ^ " at line " ^
+ string_of_int pos_line ^ ", position " ^ string_of_int pos_col ^
+ ":\n",
+ Validation_error text))
+ in
+
+ let rec check_tree t =
+ let idref_atts = t # idref_attribute_names in
+ List.iter
+ (fun att ->
+ match t # attribute att with
+ Value s ->
+ begin try ignore(id_index # find s) with
+ Not_found ->
+ error t att s
+ end
+ | Valuelist l ->
+ List.iter
+ (fun s ->
+ try ignore(id_index # find s) with
+ Not_found ->
+ error t att s
+ )
+ l
+ | Implied_value -> ()
+ )
+ idref_atts;
+ List.iter check_tree (t # sub_nodes)
+ in
+ check_tree root
+;;
+
+
+exception Return_DTD of dtd;;
+ (* Used by extract_dtd_from_document_entity to jump out of the parser *)
+
+
+let call_parser ~configuration:cfg
+ ~source:src
+ ~dtd
+ ~extensible_dtd
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl
+ ~transform_dtd
+ ~(id_index : 'ext #index option)
+ ~use_document_entity
+ ~entry
+ ~init_lexer =
+ let e = cfg.errors_with_line_numbers in
+ let w = cfg.warner in
+ let r, en =
+ match src with
+ Entity(m,r') -> r', m dtd
+ | ExtID(xid,r') -> r',
+ if use_document_entity then
+ new document_entity
+ r' dtd "[toplevel]" w xid e
+ cfg.encoding
+ else
+ new external_entity
+ r' dtd "[toplevel]" w xid false e
+ cfg.encoding
+ in
+ r # init_rep_encoding cfg.encoding;
+ r # init_warner w;
+ en # set_debugging_mode (cfg.debugging_mode);
+ let pobj =
+ new parser_object
+ doc
+ dtd
+ extensible_dtd
+ cfg
+ r
+ spec
+ process_xmldecl
+ transform_dtd
+ (id_index :> 'ext index option)
+ in
+ let mgr = new entity_manager en in
+ en # open_entity true init_lexer;
+ begin try
+ let context = make_context mgr in
+ pobj # parse context entry;
+ ignore(en # close_entity);
+ with
+ Return_DTD d ->
+ ignore(en # close_entity);
+ raise(Return_DTD d)
+ | error ->
+ ignore(en # close_entity);
+ r # close_all;
+ let pos = mgr # position_string in
+ raise (At(pos, error))
+ end;
+ if cfg.idref_pass then begin
+ match id_index with
+ None -> ()
+ | Some idx ->
+ ( match pobj # root with
+ None -> ()
+ | Some root ->
+ idref_pass idx root;
+ )
+ end;
+ pobj
+
+
+let parse_dtd_entity cfg src =
+ (* Parse a DTD given as separate entity. *)
+ let dtd = new dtd cfg.warner cfg.encoding in
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:true (* Extend the DTD by parsed declarations *)
+ ~document:doc
+ ~specification:default_spec
+ ~process_xmldecl:false (* The XML declaration is ignored
+ * (except 'encoding')
+ *)
+ ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
+ ~id_index: None
+ ~use_document_entity:false
+ ~entry:Ext_declarations (* Entry point of the grammar *)
+ ~init_lexer:Declaration (* The initially used lexer *)
+ in
+ dtd # validate;
+ if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
+ dtd
+;;
+
+
+let parse_content_entity ?id_index cfg src dtd spec =
+ (* Parse an element given as separate entity *)
+ dtd # validate; (* ensure that the DTD is valid *)
+ if cfg.accept_only_deterministic_models then dtd # only_deterministic_models;
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:true (* Extend the DTD by parsed declarations *)
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl:false (* The XML declaration is ignored
+ * (except 'encoding')
+ *)
+ ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
+ ~id_index:(id_index :> 'ext index option)
+ ~use_document_entity:false
+ ~entry:Ext_element (* Entry point of the grammar *)
+ ~init_lexer:Content (* The initially used lexer *)
+ in
+ match pobj # root with
+ Some r -> r
+ | None -> raise(WF_error("No root element"))
+;;
+
+
+let parse_wfcontent_entity cfg src spec =
+ let dtd = new dtd cfg.warner cfg.encoding in
+ dtd # allow_arbitrary;
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:false (* Do not extend the DTD *)
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl:false (* The XML declaration is ignored
+ * (except 'encoding')
+ *)
+ ~transform_dtd:(fun x -> x) (* Do not transform the DTD *)
+ ~id_index:None
+ ~use_document_entity:false
+ ~entry:Ext_element (* Entry point of the grammar *)
+ ~init_lexer:Content (* The initially used lexer *)
+ in
+ match pobj # root with
+ Some r -> r
+ | None -> raise(WF_error("No root element"))
+;;
+
+
+let iparse_document_entity ?(transform_dtd = (fun x -> x))
+ ?id_index
+ cfg0 src spec p_wf =
+ (* Parse an element given as separate entity *)
+ (* p_wf: 'true' if in well-formedness mode, 'false' if in validating mode *)
+ let cfg = { cfg0 with
+ recognize_standalone_declaration =
+ cfg0.recognize_standalone_declaration && (not p_wf)
+ } in
+ let dtd = new dtd cfg.warner cfg.encoding in
+ if p_wf then
+ dtd # allow_arbitrary;
+ let doc = new document cfg.warner in
+ let pobj =
+ call_parser
+ ~configuration:cfg
+ ~source:src
+ ~dtd:dtd
+ ~extensible_dtd:(not p_wf) (* Extend the DTD by parsed declarations
+ * only if in validating mode
+ *)
+ ~document:doc
+ ~specification:spec
+ ~process_xmldecl:true (* The XML declaration is processed *)
+ (* TODO: change to 'not p_wf' ? *)
+ ~transform_dtd:(fun dtd ->
+ let dtd' = transform_dtd dtd in
+ if cfg.accept_only_deterministic_models then
+ dtd' # only_deterministic_models;
+ dtd')
+
+ ~id_index:(id_index :> 'ext index option)
+ ~use_document_entity:true
+ ~entry:Ext_document (* Entry point of the grammar *)
+ ~init_lexer:Document (* The initially used lexer *)
+ in
+ pobj # doc
+;;
+
+
+let parse_document_entity ?(transform_dtd = (fun x -> x))
+ ?id_index
+ cfg src spec =
+ iparse_document_entity
+ ~transform_dtd:transform_dtd
+ ?id_index:(id_index : 'ext #index option :> 'ext index option)
+ cfg src spec false;;
+
+let parse_wfdocument_entity cfg src spec =
+ iparse_document_entity cfg src spec true;;
+
+let extract_dtd_from_document_entity cfg src =
+ let transform_dtd dtd = raise (Return_DTD dtd) in
+ try
+ let doc = parse_document_entity
+ ~transform_dtd:transform_dtd
+ cfg
+ src
+ default_spec in
+ (* Should not happen: *)
+ doc # dtd
+ with
+ Return_DTD dtd ->
+ (* The normal case: *)
+ dtd
+;;
+
+
+let default_config =
+ let w = new drop_warnings in
+ { warner = w;
+ errors_with_line_numbers = true;
+ enable_pinstr_nodes = false;
+ enable_super_root_node = false;
+ enable_comment_nodes = false;
+ encoding = `Enc_iso88591;
+ recognize_standalone_declaration = true;
+ store_element_positions = true;
+ idref_pass = false;
+ validate_by_dfa = true;
+ accept_only_deterministic_models = true;
+ debugging_mode = false;
+ }
+
+
+class [ 'ext ] hash_index =
+object
+ constraint 'ext = 'ext node #extension
+ val ht = (Hashtbl.create 100 : (string, 'ext node) Hashtbl.t)
+ method add s n =
+ try
+ ignore(Hashtbl.find ht s);
+ raise ID_not_unique
+ with
+ Not_found ->
+ Hashtbl.add ht s n
+
+ method find s = Hashtbl.find ht s
+ method index = ht
+end
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:29 lpadovan
+ * Initial revision
+ *
+ * Revision 1.14 2000/08/26 23:23:14 gerd
+ * Bug: from_file must not interpret the file name as URL path.
+ * Bug: When PI and comment nodes are generated, the collected data
+ * material must be saved first.
+ *
+ * Revision 1.13 2000/08/19 21:30:03 gerd
+ * Improved the error messages of the parser
+ *
+ * Revision 1.12 2000/08/18 20:16:25 gerd
+ * Implemented that Super root nodes, pinstr nodes and comment
+ * nodes are included into the document tree.
+ *
+ * Revision 1.11 2000/08/14 22:24:55 gerd
+ * Moved the module Pxp_encoding to the netstring package under
+ * the new name Netconversion.
+ *
+ * Revision 1.10 2000/07/23 02:16:33 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.9 2000/07/14 13:57:29 gerd
+ * Added the id_index feature.
+ *
+ * Revision 1.8 2000/07/09 17:52:45 gerd
+ * New implementation for current_data.
+ * The position of elements is stored on demand.
+ *
+ * Revision 1.7 2000/07/09 01:00:35 gerd
+ * Improvement: It is now guaranteed that only one data node
+ * is added for consecutive character material.
+ *
+ * Revision 1.6 2000/07/08 16:27:29 gerd
+ * Cleaned up the functions calling the parser.
+ * New parser argument: transform_dtd.
+ * Implementations for 'extract_dtd_from_document_entity' and
+ * 'parse_wfcontent_entity'.
+ *
+ * Revision 1.5 2000/07/06 23:05:18 gerd
+ * Initializations of resolvers were missing.
+ *
+ * Revision 1.4 2000/07/06 22:11:01 gerd
+ * Fix: The creation of the non-virtual root element is protected
+ * in the same way as the virtual root element.
+ *
+ * Revision 1.3 2000/07/04 22:15:18 gerd
+ * Change: Using the new resolver capabilities.
+ * Still incomplete: the new extraction and parsing functions.
+ *
+ * Revision 1.2 2000/06/14 22:19:06 gerd
+ * Added checks such that it is impossible to mix encodings.
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_yacc.m2y:
+ *
+ * Revision 1.9 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.8 2000/05/27 19:26:19 gerd
+ * Change: The XML declaration is interpreted right after
+ * it has been parsed (no longer after the document): new function
+ * check_and_parse_xmldecl.
+ * When elements, attributes, and entities are declared
+ * it is stored whether the declaration happens in an external
+ * entity (for the standalone check).
+ * The option recognize_standalone_declaration is interpreted.
+ *
+ * Revision 1.7 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.6 2000/05/14 21:51:24 gerd
+ * Change: Whitespace is handled by the grammar, and no longer
+ * by the entity.
+ *
+ * Revision 1.5 2000/05/14 17:50:54 gerd
+ * Updates because of changes in the token type.
+ *
+ * Revision 1.4 2000/05/11 22:09:17 gerd
+ * Fixed the remaining problems with conditional sections.
+ * This seems to be also a weakness of the XML spec!
+ *
+ * Revision 1.3 2000/05/09 00:02:44 gerd
+ * Conditional sections are now recognized by the parser.
+ * There seem some open questions; see the TODO comments!
+ *
+ * Revision 1.2 2000/05/08 22:01:44 gerd
+ * Introduced entity managers (see markup_entity.ml).
+ * The XML declaration is now recognized by the parser. If such
+ * a declaration is found, the method process_xmldecl of the currently
+ * active entity is called. If the first token is not an XML declaration,
+ * the method process_missing_xmldecl is called instead.
+ * Some minor changes.
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ *
+ * ======================================================================
+ *
+ * COPIED FROM REVISION 1.19 OF markup_yacc.mly
+ *
+ * Revision 1.19 2000/05/01 15:20:08 gerd
+ * "End tag matches start tag" is checked before "End tag in the
+ * same entity as start tag".
+ *
+ * Revision 1.18 2000/04/30 18:23:08 gerd
+ * Bigger change: Introduced the concept of virtual roots. First,
+ * this reduces the number of checks. Second, it makes it possible to
+ * return the virtual root to the caller instead of the real root (new
+ * config options 'virtual_root' and 'processing_instructions_inline').
+ * Minor changes because of better CR/CRLF handling.
+ *
+ * Revision 1.17 2000/03/13 23:47:46 gerd
+ * Updated because of interface changes. (See markup_yacc_shadow.mli
+ * rev. 1.8)
+ *
+ * Revision 1.16 2000/01/20 20:54:43 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.15 1999/12/17 22:27:58 gerd
+ * Bugfix: The value of 'p_internal_subset' (an instance
+ * variable of the parser object) is to true when the internal subset
+ * begins, and is set to false when this subset ends. The error was
+ * that references to external entities within this subset did not
+ * set 'p_internal_subset' to false; this is now corrected by introducing
+ * the 'p_internal_subset_stack'.
+ * This is a typical example of how the code gets more and
+ * more complicated and that it is very difficult to really understand
+ * what is going on.
+ *
+ * Revision 1.14 1999/11/09 22:23:37 gerd
+ * Removed the invocation of "init_dtd" of the root document.
+ * This method is no longer available. The DTD is also passed to the
+ * document object by the root element, so nothing essential changes.
+ *
+ * Revision 1.13 1999/10/25 23:37:09 gerd
+ * Bugfix: The warning "More than one ATTLIST declaration for element
+ * type ..." is only generated if an ATTLIST is found while there are already
+ * attributes for the element.
+ *
+ * Revision 1.12 1999/09/01 23:08:38 gerd
+ * New frontend function: parse_wf_document. This simply uses
+ * a DTD that allows anything, and by the new parameter "extend_dtd" it is
+ * avoided that element, attlist, and notation declarations are added to this
+ * DTD. The idea is that this function simulates a well-formedness parser.
+ * Tag_beg, Tag_end carry the entity_id. The "elstack" stores the
+ * entity_id of the stacked tag. This was necessary because otherwise there
+ * are some examples to produces incorrectly nested elements.
+ * p_internal_subset is a variable that stores whether the internal
+ * subset is being parsed. This is important beacause entity declarations in
+ * internal subsets are not allowed to contain parameter references.
+ * It is checked if the "elstack" is empty after all has been parsed.
+ * Processing instructions outside DTDs and outside elements are now
+ * added to the document.
+ * The rules of mixed and regexp style content models have been
+ * separated. The code is now much simpler.
+ * Entity references outside elements are detected and rejected.
+ *
+ * Revision 1.11 1999/09/01 16:26:08 gerd
+ * Improved the quality of error messages.
+ *
+ * Revision 1.10 1999/08/31 19:13:31 gerd
+ * Added checks on proper PE nesting. The idea is that tokens such
+ * as Decl_element and Decl_rangle carry an entity ID with them. This ID
+ * is simply an object of type < >, i.e. you can only test on identity.
+ * The lexer always produces tokens with a dummy ID because it does not
+ * know which entity is the current one. The entity layer replaces the dummy
+ * ID with the actual ID. The parser checks that the IDs of pairs such as
+ * Decl_element and Decl_rangle are the same; otherwise a Validation_error
+ * is produced.
+ *
+ * Revision 1.9 1999/08/15 20:42:01 gerd
+ * Corrected a misleading message.
+ *
+ * Revision 1.8 1999/08/15 20:37:34 gerd
+ * Improved error messages.
+ * Bugfix: While parsing document entities, the subclass document_entity is
+ * now used instead of external_entity. The rules in document entities are a bit
+ * stronger.
+ *
+ * Revision 1.7 1999/08/15 14:03:59 gerd
+ * Empty documents are not allowed.
+ * "CDATA section not allowed here" is a WF_error, not a Validation_
+ * error.
+ *
+ * Revision 1.6 1999/08/15 02:24:19 gerd
+ * Removed some grammar rules that were used for testing.
+ * Documents without DTD can now have arbitrary elements (formerly
+ * they were not allowed to have any element).
+ *
+ * Revision 1.5 1999/08/14 22:57:20 gerd
+ * It is allowed that external entities are empty because the
+ * empty string is well-parsed for both declarations and contents. Empty
+ * entities can be referenced anywhere because the references are replaced
+ * by nothing. Because of this, the Begin_entity...End_entity brace is only
+ * inserted if the entity is non-empty. (Otherwise references to empty
+ * entities would not be allowed anywhere.)
+ * As a consequence, the grammar has been changed such that a
+ * single Eof is equivalent to Begin_entity,End_entity without content.
+ *
+ * Revision 1.4 1999/08/14 22:20:01 gerd
+ * The "config" slot has now a component "warner" which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Furthermore, there is a new component "debugging_mode".
+ * Some Parse_error exceptions have been changed into Validation_error.
+ * The interfaces of functions/classes imported from other modules
+ * have changed; the invocations have been adapted.
+ * Contents may contain CDATA sections that have been forgotten.
+ *
+ * Revision 1.3 1999/08/11 15:00:41 gerd
+ * The Begin_entity ... End_entity brace is also possible in
+ * 'contents'.
+ * The configuration passed to the parsing object contains always
+ * the resolver that is actually used.
+ *
+ * Revision 1.2 1999/08/10 21:35:12 gerd
+ * The XML/encoding declaration at the beginning of entities is
+ * evaluated. In particular, entities have now a method "xml_declaration"
+ * which returns the name/value pairs of such a declaration. The "encoding"
+ * setting is interpreted by the entity itself; "version", and "standalone"
+ * are interpreted by Markup_yacc.parse_document_entity. Other settings
+ * are ignored (this does not conform to the standard; the standard prescribes
+ * that "version" MUST be given in the declaration of document; "standalone"
+ * and "encoding" CAN be declared; no other settings are allowed).
+ * TODO: The user should be warned if the standard is not exactly
+ * fulfilled. -- The "standalone" property is not checked yet.
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ * PXP: The polymorphic XML parser for Objective Caml.
+ * Copyright by Gerd Stolpmann. See LICENSE for details.
+ *)
+
+
+(*$ markup-yacc.mli *)
+
+open Pxp_types
+open Pxp_dtd
+open Pxp_document
+
+exception ID_not_unique
+
+class type [ 'ext ] index =
+object
+ (* The type of indexes over the ID attributes of the elements. This type
+ * is the minimum requirement needed by the parser to create such an index.
+ *)
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ (* Add the passed node to the index. If there is already an ID with
+ * the passed string value, the exception ID_not_unique should be
+ * raised. (But the index is free also to accept several identical IDs.)
+ *)
+ method find : string -> 'ext node
+ (* Finds the node with the passed ID value, or raises Not_found *)
+end
+;;
+
+
+class [ 'ext ] hash_index :
+object
+ (* This is a simple implementation of 'index' using a hash table. *)
+ constraint 'ext = 'ext node #extension
+ method add : string -> 'ext node -> unit
+ (* See above. *)
+ method find : string -> 'ext node
+ (* See above. *)
+ method index : (string, 'ext node) Hashtbl.t
+ (* Returns the hash table. *)
+end
+;;
+
+
+type config =
+ { warner : collect_warnings;
+ (* An object that collects warnings. *)
+
+ errors_with_line_numbers : bool;
+ (* Whether error messages contain line numbers or not. The parser
+ * is 10 to 20 per cent faster if line numbers are turned off;
+ * you get only byte positions in this case.
+ *)
+
+ enable_pinstr_nodes : bool;
+ (* true: turns a special mode for processing instructions on. Normally,
+ * you cannot determine the exact location of a PI; you only know
+ * in which element the PI occurs. This mode makes it possible
+ * to find the exact location out: Every PI is artificially wrapped
+ * by a special node with type T_pinstr. For example, if the XML text
+ * is <a><?x?><?y?></a>, the parser normally produces only an element
+ * object for "a", and puts the PIs "x" and "y" into it (without
+ * order). In this mode, the object "a" will contain two objects
+ * with type T_pinstr, and the first object will contain "x", and the
+ * second "y": the object tree looks like
+ * - Node with type = T_element "a"
+ * - Node with type = T_pinstr "x"
+ * + contains processing instruction "x"
+ * - Node with type = T_pinstr "y"
+ * + contains processing instruction "y"
+ *
+ * Notes:
+ * (1) In past versions of PXP this mode was called
+ * processing_instructions_inline, and it produced nodes of
+ * type T_element "-pi" instead of T_pinstr.
+ * (2) The T_pinstr nodes are created from the pinstr exemplars
+ * in your spec
+ *)
+
+ enable_super_root_node : bool;
+ (* true: the topmost element of the XML tree is not the root element,
+ * but the so-called super root. The root element is a son of the
+ * super root. The super root is a node with type T_super_root.
+ * The following behaviour changes, too:
+ * - PIs occurring outside the root element and outside the DTD are
+ * added to the super root instead of the document object
+ * - If enable_pinstr_nodes is also turned on, the PI wrappers
+ * are added to the super root
+ *
+ * For example, the document
+ * <?x?><a>y</a><?y?>
+ * is normally represented by:
+ * - document object
+ * + contains PIs x and y
+ * - reference to root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * With enabled super root node:
+ * - document object
+ * - reference to super root node with type = T_super_root
+ * + contains PIs x and y
+ * - root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * If also enable_pinstr_nodes:
+ * - document object
+ * - reference to super root node with type = T_super_root
+ * - node with type = T_pinstr "x"
+ * + contains PI "x"
+ * - root node with type = T_element "a"
+ * - node with type = T_data: contains "y"
+ * - node with type = T_pinstr "y"
+ * + contains PI "y"
+ * Notes:
+ * (1) In previous versions of PXP this mode was called
+ * virtual_root, and it produced an additional node of type
+ * T_element "-vr" instead of T_super_root.
+ * (2) The T_super_root node is created from the super root exemplar
+ * in your spec.
+ *)
+
+ enable_comment_nodes : bool;
+ (* When enabled, comments are represented as nodes with type =
+ * T_comment.
+ * To access the contents of comments, use the method "comment"
+ * for the comment nodes.
+ * These nodes behave like elements; however, they are normally
+ * empty and do not have attributes. Note that it is possible to
+ * add children to comment nodes and to set attributes, but it is
+ * strongly recommended not to do so. There are no checks on
+ * such abnormal use, because they would cost too
+ * much time, even when no comment nodes are generated at all.
+ *
+ * Comment nodes should be disabled unless you must parse a
+ * third-party XML text which uses comments as another data
+ * container.
+ *
+ * The nodes of type T_comment are created from the comment exemplars
+ * in your spec.
+ *)
+
+ encoding : rep_encoding;
+ (* Specifies the encoding used for the *internal* representation
+ * of any character data.
+ * Note that the default is still Enc_iso88591.
+ *)
+
+ recognize_standalone_declaration : bool;
+ (* Whether the "standalone" declaration is recognized or not.
+ * This option does not have an effect on well-formedness parsing:
+ * in this case such declarations are never recognized.
+ *
+ * Recognizing the "standalone" declaration means that the
+ * value of the declaration is scanned and passed to the DTD,
+ * and that the "standalone-check" is performed.
+ *
+ * Standalone-check: If a document is flagged standalone='yes'
+ * some additional constraints apply. The idea is that a parser
+ * without access to any external document subsets can still parse
+ * the document, and will still return the same values as the parser
+ * with such access. For example, if the DTD is external and if
+ * there are attributes with default values, it is checked that there
+ * is no element instance where these attributes are omitted - the
+ * parser would return the default value but this requires access to
+ * the external DTD subset.
+ *)
+
+ store_element_positions : bool;
+ (* Whether the file name, the line and the column of the
+ * beginning of elements are stored in the element nodes.
+ * This option may be useful to generate error messages.
+ *
+ * Positions are only stored for:
+ * - Elements
+ * - Wrapped processing instructions (see enable_pinstr_nodes)
+ * For all other node types, no position is stored.
+ *
+ * You can access positions by the method "position" of nodes.
+ *)
+
+ idref_pass : bool;
+ (* Whether the parser does a second pass and checks that all
+ * IDREF and IDREFS attributes contain valid references.
+ * This option works only if an ID index is available. To create
+ * an ID index, pass an index object as id_index argument to the
+ * parsing functions (such as parse_document_entity; see below).
+ *
+ * "Second pass" does not mean that the XML text is again parsed;
+ * only the existing document tree is traversed, and the check
+ * on bad IDREF/IDREFS attributes is performed for every node.
+ *)
+
+ validate_by_dfa : bool;
+ (* If true, and if DFAs are available for validation, the DFAs will
+ * actually be used for validation.
+ * If false, or if no DFAs are available, the standard backtracking
+ * algorithm will be used.
+ * DFA = deterministic finite automaton.
+ *
+ * DFAs are only available if accept_only_deterministic_models is
+ * "true" (because in this case, it is relatively cheap to construct
+ * the DFAs). DFAs are a data structure which ensures that validation
+ * can always be performed in linear time.
+ *
+ * I strongly recommend using DFAs; however, there are examples
+ * for which validation by backtracking is faster.
+ *)
+
+ accept_only_deterministic_models : bool;
+ (* Whether only deterministic content models are accepted in DTDs. *)
+
+ (* The following options are not implemented, or only for internal
+ * use.
+ *)
+
+ debugging_mode : bool;
+ }
+
+
+type source =
+ Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+ | ExtID of (ext_id * Pxp_reader.resolver)
+
+val from_channel :
+ ?system_encoding:encoding -> ?id:ext_id -> ?fixenc:encoding ->
+ in_channel -> source
+
+val from_string :
+ ?fixenc:encoding -> string -> source
+
+val from_file :
+ ?system_encoding:encoding -> string -> source
+
+(* Notes on sources (version 2):
+ *
+ * Sources specify where the XML text to parse comes from. Sources not only
+ * represent character streams, but also external IDs (i.e. SYSTEM or PUBLIC
+ * names), and they are interpreted as a specific encoding of characters.
+ * A source should be associated with an external ID, because otherwise
+ * it is not known how to handle relative names.
+ *
+ * There are two primary sources, Entity and ExtID, and several functions
+ * for derived sources. First explanations for the functions:
+ *
+ * from_channel: The XML text is read from an in_channel. By default, the
+ * channel is not associated with an external ID, and it is impossible
+ * to resolve relative SYSTEM IDs found in the document.
+ * If the ?id argument is passed, it is assumed that the channel has this
+ * external ID. If relative SYSTEM IDs occur in the document, they can
+ * be interpreted; however, it is only possible to read from "file:"
+ * IDs.
+ * By default, the channel automatically detects the encoding. You can
+ * set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_string: The XML text is read from a string.
+ * It is impossible to read from any external entity whose reference is found
+ * in the string.
+ * By default, the encoding of the string is detected automatically. You can
+ * set a fixed encoding by passing the ?fixenc argument.
+ *
+ * from_file: The XML text is read from the file whose file name is
+ * passed to the function (as UTF-8 string).
+ * Relative system IDs can be interpreted by this function.
+ * The ?system_encoding argument specifies the character encoding used
+ * for file names (sic!). By default, UTF-8 is assumed.
+ *
+ * Examples:
+ *
+ * from_file "/tmp/file.xml":
+ * reads from this file, which is assumed to have the ID
+ * SYSTEM "file://localhost/tmp/file.xml".
+ *
+ * let ch = open_in "/tmp/file.xml" in
+ * from_channel ~id:(System "file://localhost/tmp/file.xml") ch
+ * This does the same, but uses a channel.
+ *
+ * from_channel ~id:(System "http://host/file.xml")
+ * ch
+ * reads from the channel ch, and it is assumed that the ID is
+ * SYSTEM "http://host/file.xml". If there is any relative SYSTEM ID,
+ * it will be interpreted relative to this location; however, there is
+ * no way to read via HTTP.
+ * If there is any "file:" SYSTEM ID, it is possible to read the file.
+ *
+ * The primary sources:
+ *
+ * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the
+ * entity to read from is passed to the resolver, and the resolver finds
+ * the entity and opens it.
+ * The intention of this option is to allow customized
+ * resolvers to interpret external identifiers without any restriction.
+ * The Pxp_reader module contains several classes allowing the user to
+ * compose such a customized resolver from predefined components.
+ *
+ * ExtID is the interface of choice for own extensions to resolvers.
+ *
+ * - Entity(m,r): You can implementy every behaviour by using a customized
+ * entity class. Once the DTD object d is known that will be used during
+ * parsing, the entity e = m d is determined and used together with the
+ * resolver r.
+ * This is only for hackers.
+ *)
+
+
+
+val default_config : config
+ (* - Warnings are thrown away
+ * - Error messages will contain line numbers
+ * - Neither T_super_root nor T_pinstr nor T_comment nodes are generated
+ * - The internal encoding is ISO-8859-1
+ * - The standalone declaration is checked
+ * - Element positions are stored
+ * - The IDREF pass is left out
+ * - If available, DFAs are used for validation
+ * - Only deterministic content models are accepted
+ *)
+
+val default_extension : ('a node extension) as 'a
+ (* A "null" extension; an extension that does not extend the functionality *)
+
+val default_spec : ('a node extension as 'a) spec
+ (* Specifies that you do not want to use extensions. *)
+
+val parse_dtd_entity : config -> source -> dtd
+ (* Parse an entity containing a DTD (external subset), and return this DTD. *)
+
+val extract_dtd_from_document_entity : config -> source -> dtd
+ (* Parses a closed document, i.e. a document beginning with <!DOCTYPE...>,
+ * and returns the DTD contained in the document.
+ * The parts of the document outside the DTD are actually not parsed,
+ * i.e. parsing stops when all declarations of the DTD have been read.
+ *)
+
+val parse_document_entity :
+ ?transform_dtd:(dtd -> dtd) ->
+ ?id_index:('ext index) ->
+ config -> source -> 'ext spec -> 'ext document
+ (* Parse a closed document, i.e. a document beginning with <!DOCTYPE...>,
+ * and validate the contents of the document against the DTD contained
+ * and/or referenced in the document.
+ *
+ * If the optional argument ~transform_dtd is passed, the following
+ * modification applies: After the DTD (both the internal and external
+ * subsets) has been parsed, the function ~transform_dtd is called,
+ * and the resulting DTD is actually used to validate the document.
+ *
+ * If the optional argument ~transform_dtd is missing, the parser
+ * behaves in the same way as if the identity were passed as ~transform_dtd.
+ *
+ * If the optional argument ~id_index is present, the parser adds
+ * any ID attribute to the passed index. An index is required to detect
+ * violations of the uniqueness of IDs.
+ *)
+
+val parse_wfdocument_entity :
+ config -> source -> 'ext spec -> 'ext document
+ (* Parse a closed document (see parse_document_entity), but do not
+ * validate it. Only checks on well-formedness are performed.
+ *)
+
+val parse_content_entity :
+ ?id_index:('ext index) ->
+ config -> source -> dtd -> 'ext spec -> 'ext node
+ (* Parse a file representing a well-formed fragment of a document. The
+ * fragment must be a single element (i.e. something like <a>...</a>;
+ * not a sequence like <a>...</a><b>...</b>). The element is validated
+ * against the passed DTD, but it is not checked whether the element is
+ * the root element specified in the DTD.
+ *
+ * If the optional argument ~id_index is present, the parser adds
+ * any ID attribute to the passed index. An index is required to detect
+ * violations of the uniqueness of IDs.
+ *)
+
+val parse_wfcontent_entity :
+ config -> source -> 'ext spec -> 'ext node
+ (* Parse a file representing a well-formed fragment of a document
+ * (see parse_content_entity). The fragment is not validated, only
+ * checked for well-formedness.
+ *)
+
+
+(*$-*)
+
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:30 lpadovan
+ * Initial revision
+ *
+ * Revision 1.7 2000/08/18 20:15:43 gerd
+ * Config options:
+ * - enable_super_root_nodes: new name for virtual_root
+ * - enable_pinstr_nodes: new name for processing_instructions_inline
+ * - enable_comment_nodes: new option
+ * Updated comments for various options.
+ *
+ * Revision 1.6 2000/07/23 02:16:33 gerd
+ * Support for DFAs.
+ *
+ * Revision 1.5 2000/07/14 13:57:29 gerd
+ * Added the id_index feature.
+ *
+ * Revision 1.4 2000/07/09 17:52:54 gerd
+ * New option store_element_positions.
+ *
+ * Revision 1.3 2000/07/08 16:26:21 gerd
+ * Added the signatures of the functions
+ * 'extract_dtd_from_document_entity' and 'parse_wfcontent_entity'.
+ * Updated the signature of 'parse_document_entity': New optional
+ * argument 'transform_dtd'.
+ * Updated the comments.
+ *
+ * Revision 1.2 2000/07/04 22:09:03 gerd
+ * MAJOR CHANGE: Redesign of the interface (not yet complete).
+ *
+ * Revision 1.1 2000/05/29 23:48:38 gerd
+ * Changed module names:
+ * Markup_aux into Pxp_aux
+ * Markup_codewriter into Pxp_codewriter
+ * Markup_document into Pxp_document
+ * Markup_dtd into Pxp_dtd
+ * Markup_entity into Pxp_entity
+ * Markup_lexer_types into Pxp_lexer_types
+ * Markup_reader into Pxp_reader
+ * Markup_types into Pxp_types
+ * Markup_yacc into Pxp_yacc
+ * See directory "compatibility" for (almost) compatible wrappers emulating
+ * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc.
+ *
+ * ======================================================================
+ * Old logs from markup_yacc.mli:
+ *
+ * Revision 1.4 2000/05/29 21:14:57 gerd
+ * Changed the type 'encoding' into a polymorphic variant.
+ *
+ * Revision 1.3 2000/05/27 19:24:01 gerd
+ * New option: recognize_standalone_declaration.
+ *
+ * Revision 1.2 2000/05/20 20:31:40 gerd
+ * Big change: Added support for various encodings of the
+ * internal representation.
+ *
+ * Revision 1.1 2000/05/06 23:21:49 gerd
+ * Initial revision.
+ *
+ * Revision 1.9 2000/04/30 18:23:38 gerd
+ * New config options 'processing_instructions_inline' and
+ * 'virtual_root'.
+ *
+ * Revision 1.8 2000/03/13 23:46:46 gerd
+ * Change: The 'resolver' component of the 'config' type has
+ * disappeared. Instead, there is a new resolver component in the Entity
+ * and ExtID values of 'source'. I hope that this makes clearer that the
+ * resolver has only an effect if used together with Entity and ExtID
+ * sources.
+ * Change: The Entity value can now return the entity dependent
+ * on the DTD that is going to be used.
+ *
+ * Revision 1.7 2000/02/22 02:32:02 gerd
+ * Updated.
+ *
+ * Revision 1.6 2000/02/22 01:52:45 gerd
+ * Added documentation.
+ *
+ * Revision 1.5 2000/01/20 20:54:43 gerd
+ * New config.errors_with_line_numbers.
+ *
+ * Revision 1.4 1999/09/01 23:09:10 gerd
+ * New function parse_wf_entity that simulates a well-formedness
+ * parser.
+ *
+ * Revision 1.3 1999/09/01 16:26:36 gerd
+ * Added an empty line. This is *really* a big change.
+ *
+ * Revision 1.2 1999/08/14 22:20:27 gerd
+ * The "config" slot has now a component "warner"which is
+ * an object with a "warn" method. This is used to warn about characters
+ * that cannot be represented in the Latin 1 alphabet.
+ * Furthermore, there is a new component "debugging_mode".
+ *
+ * Revision 1.1 1999/08/10 00:35:52 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+- Conditional sections:
+
+ Conditional_begin and Conditional_end must be in the same entity.
+
+- NDATA: check whether ENTITY attributes refer only to declared
+ NDATA entities
--- /dev/null
+.PHONY: all
+all:
+ $(MAKE) -C reader
+ $(MAKE) -C write
+ $(MAKE) -C codewriter
+ $(MAKE) -C canonxml
+ $(MAKE) -C negative
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.cmxa
+
+.PHONY: CLEAN
+CLEAN: clean
+ $(MAKE) -C reader clean
+ $(MAKE) -C write clean
+ $(MAKE) -C codewriter clean
+ $(MAKE) -C canonxml clean
+ $(MAKE) -C negative clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f dumpfiles
+ $(MAKE) -C reader distclean
+ $(MAKE) -C write distclean
+ $(MAKE) -C codewriter distclean
+ $(MAKE) -C canonxml distclean
+ $(MAKE) -C negative distclean
+
+dumpfiles: dumpfiles.ml
+ ocamlc -o dumpfiles dumpfiles.ml
--- /dev/null
+----------------------------------------------------------------------
+(Anti) Regression tests
+----------------------------------------------------------------------
+
+- To build the tests, "markup" must already be compiled in ..
+ Do "make" to start the compilation.
+
+- To run the tests:
+ ./run
+
+- Program dumpfiles: Do "make dumpfiles" to create it.
+ It takes XML file names on the command line, and writes a Latex
+ document on stdout. The document shows the contents of all files.
+ EXAMPLE:
+ $ ./dumpfiles canonxml/data_jclark_valid/ext-sa/*.* >x.tex
+ $ latex x
--- /dev/null
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_canonxml: test_canonxml.ml
+ ocamlfind ocamlc -g -custom -o test_canonxml -package .,str -linkpkg test_canonxml.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out.xml
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_canonxml
--- /dev/null
+----------------------------------------------------------------------
+Regression test "canonxml":
+----------------------------------------------------------------------
+
+- An XML file is parsed, and the contents are printed in a canonical
+ format.
+
+- The output is compared with a reference file. The test is only
+ passed if the output and the reference are equal.
+
+- Test data "data_jclark_valid":
+ Contains the samples by James Clark that are valid. The subdirectories:
+ - sa: standalone documents
+ - not-sa: non-standalone document (with external DTD)
+ - ext-sa: non-standalone document (with other external entity)
+
+ Tests that are not passed have been moved into the *-problems directories.
+ The reason is typically that characters have been used not in the
+ Latin 1 character set.
+
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "001.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+Data
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "002.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "003.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "004.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<e/><e/><e/>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (e*)>
+<!ELEMENT e EMPTY>
+<!ENTITY e SYSTEM "005.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+Data
+<e/>
+More data
+<e/>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA|e)*>
+<!ELEMENT e EMPTY>
+<!ENTITY e SYSTEM "006.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "007.ent">
+]>
+<doc>X&e;Z</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "008.ent">
+]>
+<doc>X&e;Z</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "009.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "010.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e PUBLIC "a not very interesting file" "011.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+&e4;
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 SYSTEM "012.ent">
+<!ENTITY e4 "&e5;">
+<!ENTITY e5 "(e5)">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e1;</doc>
--- /dev/null
+<e/>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (e)>
+<!ELEMENT e (#PCDATA)>
+<!ATTLIST e
+ a1 CDATA "a1 default"
+ a2 NMTOKENS "a2 default"
+>
+<!ENTITY x SYSTEM "013.ent">
+]>
+<doc>&x;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "014.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<doc>Data </doc>
\ No newline at end of file
--- /dev/null
+<doc>Data</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>Data </doc>
\ No newline at end of file
--- /dev/null
+<doc><e></e><e></e><e></e></doc>
\ No newline at end of file
--- /dev/null
+<doc>Data <e></e> More data <e></e> </doc>
\ No newline at end of file
--- /dev/null
+<doc>XYZ</doc>
\ No newline at end of file
--- /dev/null
+<doc>XYZ</doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>xyzzy </doc>
\ No newline at end of file
--- /dev/null
+<doc>(e5)</doc>
\ No newline at end of file
--- /dev/null
+<doc><e a1="a1 default" a2="a2 default"></e></doc>
\ No newline at end of file
--- /dev/null
+<doc>data</doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc SYSTEM "001.ent" [
+<!ELEMENT doc EMPTY>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc SYSTEM "002.ent" [
+<!ELEMENT doc EMPTY>
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e SYSTEM "003-2.ent">
+<!ATTLIST doc a1 CDATA %e; "v1">
--- /dev/null
+<!DOCTYPE doc SYSTEM "003-1.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e1 SYSTEM "004-2.ent">
+<!ENTITY % e2 "%e1;">
+%e1;
--- /dev/null
+<!ATTLIST doc a1 CDATA "value">
--- /dev/null
+<!DOCTYPE doc SYSTEM "004-1.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e SYSTEM "005-2.ent">
+%e;
--- /dev/null
+<!ATTLIST doc a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc SYSTEM "005-1.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ATTLIST doc a1 CDATA "w1" a2 CDATA "w2">
--- /dev/null
+<!DOCTYPE doc SYSTEM "006.ent" [
+<!ATTLIST doc a1 CDATA "v1">
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc SYSTEM "007.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc PUBLIC "whatever" "008.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc PUBLIC "whatever" "009.ent" [
+<!ATTLIST doc a2 CDATA "v2">
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v2">
--- /dev/null
+<!DOCTYPE doc SYSTEM "010.ent" [
+<!ATTLIST doc a1 CDATA "v1">
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % e SYSTEM "011.ent">
+%e;
+]>
+<doc></doc>
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % e SYSTEM "012.ent">
+%e;
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![ INCLUDE [
+<!ATTLIST doc a1 CDATA "v1">
+]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "013.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![ %e; [
+<!ATTLIST doc a1 CDATA "v1">
+]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "014.ent" [
+<!ENTITY % e "INCLUDE">
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![ %e; [
+<!ATTLIST doc a1 CDATA "v1">
+]]>
+<!ATTLIST doc a2 CDATA "v2">
--- /dev/null
+<!DOCTYPE doc SYSTEM "015.ent" [
+<!ENTITY % e "IGNORE">
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![%e;[
+<!ATTLIST doc a1 CDATA "v1">
+]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "016.ent" [
+<!ENTITY % e "INCLUDE">
+]>
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "<!ATTLIST doc a1 CDATA 'v1'>">
+%e;
--- /dev/null
+<!DOCTYPE doc SYSTEM "017.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "'v1'">
+<!ATTLIST doc a1 CDATA %e;>
--- /dev/null
+<!DOCTYPE doc SYSTEM "018.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "'v1'">
+<!ATTLIST doc a1 CDATA%e;>
--- /dev/null
+<!DOCTYPE doc SYSTEM "019.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e "doc">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST%e;a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc SYSTEM "020.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e "doc a1 CDATA">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST %e; "v1">
--- /dev/null
+<!DOCTYPE doc SYSTEM "021.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e "INCLUDE[">
+<!ELEMENT doc (#PCDATA)>
+<![ %e; <!ATTLIST doc a1 CDATA "v1"> ]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "022.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e1 "do">
+<!ENTITY % e2 "c">
+<!ENTITY % e3 "%e1;%e2;">
+<!ATTLIST %e3; a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc SYSTEM "023.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e1 "'v1'">
+<!ENTITY % e2 'a1 CDATA %e1;'>
+<!ATTLIST doc %e2;>
--- /dev/null
+<!DOCTYPE doc SYSTEM "024.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "x">
+<!ENTITY % e "y">
+<!ENTITY % v "'%e;'">
+<!ATTLIST doc a1 CDATA %v;>
--- /dev/null
+<!DOCTYPE doc SYSTEM "025.ent">
+<doc></doc>
--- /dev/null
+<!ATTLIST doc a1 CDATA "w1">
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc ANY>
+<!ENTITY % e SYSTEM "026.ent">
+%e;
+<!ATTLIST doc a1 CDATA "x1" a2 CDATA "x2">
+]>
+<doc></doc>
--- /dev/null
+<!ENTITY % e "">
+<!ELEMENT doc (#PCDATA %e;)>
--- /dev/null
+<!DOCTYPE doc SYSTEM "027.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![INCLUDE[<!ATTLIST doc a1 CDATA "v1">]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "028.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![IGNORE[<!ATTLIST doc a1 CDATA "v1">]]>
+<!ATTLIST doc a1 CDATA "v2">
--- /dev/null
+<!DOCTYPE doc SYSTEM "029.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![IGNORE[]]>
+<![INCLUDE[]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "030.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e SYSTEM "031-2.ent">
+<!ENTITY e "<![CDATA[%e;]]>">
--- /dev/null
+<!ATTLIST doc a1 CDATA "v1">
--- /dev/null
+<!DOCTYPE doc SYSTEM "031-1.ent">
+<doc>&e;</doc>
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="value"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="w2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="x"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="w1" a2="x2"></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc><!ATTLIST doc a1 CDATA "v1"> </doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc ></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc >
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1="v1"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1 = "v1"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1='v1'></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc> </doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&<>"'</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc> </doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1="v1" ></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED a2 CDATA #IMPLIED>
+]>
+<doc a1="v1" a2="v2"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc : CDATA #IMPLIED>
+]>
+<doc :="v1"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc _.-0123456789 CDATA #IMPLIED>
+]>
+<doc _.-0123456789="v1"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc abcdefghijklmnopqrstuvwxyz CDATA #IMPLIED>
+]>
+<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc ABCDEFGHIJKLMNOPQRSTUVWXYZ CDATA #IMPLIED>
+]>
+<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><?pi?></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><?pi some data ? > <??></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[<foo>]]></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[<&]]></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[<&]>]]]></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><!-- a comment --></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><!-- a comment ->--></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (foo)>
+<!ELEMENT foo (#PCDATA)>
+<!ENTITY e "<foo></foo>">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (foo*)>
+<!ELEMENT foo (#PCDATA)>
+]>
+<doc><foo/><foo></foo></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (foo*)>
+<!ELEMENT foo EMPTY>
+]>
+<doc><foo/><foo></foo></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (foo*)>
+<!ELEMENT foo ANY>
+]>
+<doc><foo/><foo></foo></doc>
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<?xml version='1.0'?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<?xml version = "1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<?xml version='1.0' encoding="UTF-8"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<?xml version='1.0' standalone='yes'?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<?xml version='1.0' encoding="UTF-8" standalone='yes'?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc/>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc />
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
+<?pi data?>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
+<!-- comment -->
+
--- /dev/null
+<!-- comment -->
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
+
--- /dev/null
+<?pi data?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1=""<&>'"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+]>
+<doc a1="A"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>A</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ATTLIST doc a1 CDATA #IMPLIED>
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc a1="foo
+bar"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (e*)>
+<!ELEMENT e EMPTY>
+<!ATTLIST e a1 CDATA "v1" a2 CDATA "v2" a3 CDATA #IMPLIED>
+]>
+<doc>
+<e a3="v3"/>
+<e a1="w1"/>
+<e a2="w2" a3="v3"/>
+</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
+<!ATTLIST doc a1 CDATA "z1">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "v1">
+<!ATTLIST doc a2 CDATA "v2">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>X
+Y</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>]</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>ð€€ô¿½</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<e/>">
+<!ELEMENT doc (e)>
+<!ELEMENT e EMPTY>
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+
+
+<doc
+></doc
+>
+
+
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<?pi data?>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>A</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a*)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ATTLIST doc a1 NMTOKENS #IMPLIED>
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc a1=" 1 2 "></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (e*)>
+<!ELEMENT e EMPTY>
+<!ATTLIST e a1 CDATA #IMPLIED a2 CDATA #IMPLIED a3 CDATA #IMPLIED>
+]>
+<doc>
+<e a1="v1" a2="v2" a3="v3"/>
+<e a1="w1" a2="v2"/>
+<e a1="v1" a2="w2" a3="v3"/>
+</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>X Y</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>£</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>เจมส์</doc>
--- /dev/null
+<!DOCTYPE เจมส์ [
+<!ELEMENT เจมส์ (#PCDATA)>
+]>
+<เจมส์></เจมส์>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>𐀀􏿽</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA #IMPLIED>
+<!-- 34 is double quote -->
+<!ENTITY e1 """>
+]>
+<doc a1="&e1;"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc> </doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e " ">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!NOTATION n PUBLIC "whatever">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % e "<!ELEMENT doc (#PCDATA)>">
+%e;
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ID #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a IDREF #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a IDREFS #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ENTITY #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ENTITIES #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a NOTATION (n1|n2) #IMPLIED>
+<!NOTATION n1 SYSTEM "http://www.w3.org/">
+<!NOTATION n2 SYSTEM "http://www.w3.org/">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a (1|2) #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #REQUIRED>
+]>
+<doc a="v"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #FIXED "v">
+]>
+<doc a="v"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #FIXED "v">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a, b, c)>
+<!ELEMENT a (a?)>
+<!ELEMENT b (b*)>
+<!ELEMENT c (a | b)+>
+]>
+<doc><a/><b/><c><a/></c></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % e SYSTEM "e.dtd">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % e PUBLIC 'whatever' "e.dtd">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [<!ELEMENT doc (#PCDATA)>]><doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "<foo>">
+<!ENTITY e "">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "">
+<!ENTITY e "<foo>">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<foo/>">
+<!ELEMENT doc (foo)>
+<!ELEMENT foo EMPTY>
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "<foo>">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "𐀀􏿽">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ATTLIST e a NOTATION (n) #IMPLIED>
+<!ELEMENT doc (e)*>
+<!ELEMENT e (#PCDATA)>
+<!NOTATION n PUBLIC "whatever">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION n SYSTEM "http://www.w3.org/">
+<!ENTITY e SYSTEM "http://www.w3.org/" NDATA n>
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a ENTITY "e">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a)*>
+<!ELEMENT a EMPTY>
+]>
+<doc>
+<a/>
+ <a/> <a/>
+
+
+</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>
+
+
+</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % e "foo">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 CDATA "%e;">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ATTLIST doc a1 CDATA #IMPLIED>
+<!ATTLIST doc a1 NMTOKENS #IMPLIED>
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc a1="1 2"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ATTLIST doc a1 NMTOKENS " 1 2 ">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!ATTLIST doc a2 CDATA #IMPLIED>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e SYSTEM "097.ent">
+<!ATTLIST doc a1 CDATA "v1">
+%e;
+<!ATTLIST doc a2 CDATA "v2">
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><?pi x
+y?></doc>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e PUBLIC ";!*#@$_%" "100.xml">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e """>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="""></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x y"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x	y"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x y"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x y"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "
+">
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x&e;y"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a=""></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e " ">
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a="x&e;y"></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a NMTOKENS #IMPLIED>
+]>
+<doc a=" x  y "></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a | b)>
+<!ELEMENT a (#PCDATA)>
+]>
+<doc><a></a></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST e a CDATA #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "<![CDATA[&foo;]]>">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "v">
+]>
+<doc>&e1;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc><![CDATA[
+]]></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY rsqb "]">
+]>
+<doc>]</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY rsqb "]]">
+]>
+<doc>]</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc ANY>
+]>
+<doc><!-- -á --></doc>
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc>&<>"'</doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc :="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc _.-0123456789="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc abcdefghijklmnopqrstuvwxyz="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc ABCDEFGHIJKLMNOPQRSTUVWXYZ="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc><?pi ?></doc>
\ No newline at end of file
--- /dev/null
+<doc><?pi some data ? > <??></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><&</doc>
\ No newline at end of file
--- /dev/null
+<doc><&]>]</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc><?pi data?>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?pi data?><doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1=""<&>'"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="A"></doc>
\ No newline at end of file
--- /dev/null
+<doc>A</doc>
\ No newline at end of file
--- /dev/null
+<doc a1="foo bar"></doc>
\ No newline at end of file
--- /dev/null
+<doc> <e a1="v1" a2="v2" a3="v3"></e> <e a1="w1" a2="v2"></e> <e a1="v1" a2="w2" a3="v3"></e> </doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1" a2="v2"></doc>
\ No newline at end of file
--- /dev/null
+<doc>X Y</doc>
\ No newline at end of file
--- /dev/null
+<doc>]</doc>
\ No newline at end of file
--- /dev/null
+<doc>£</doc>
\ No newline at end of file
--- /dev/null
+<doc>เจมส์</doc>
\ No newline at end of file
--- /dev/null
+<เจมส์></เจมส์>
\ No newline at end of file
--- /dev/null
+<doc>ð€€ô¿½</doc>
\ No newline at end of file
--- /dev/null
+<doc><e></e></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?pi data?><doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>A</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="1 2"></doc>
\ No newline at end of file
--- /dev/null
+<doc> <e a1="v1" a2="v2" a3="v3"></e> <e a1="w1" a2="v2"></e> <e a1="v1" a2="w2" a3="v3"></e> </doc>
\ No newline at end of file
--- /dev/null
+<doc>X Y</doc>
\ No newline at end of file
--- /dev/null
+<doc>£</doc>
\ No newline at end of file
--- /dev/null
+<doc>เจมส์</doc>
\ No newline at end of file
--- /dev/null
+<เจมส์></เจมส์>
\ No newline at end of file
--- /dev/null
+<doc>ð€€ô¿½</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="""></doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a="v"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="v"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="v"></doc>
\ No newline at end of file
--- /dev/null
+<doc><a></a><b></b><c><a></a></c></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></foo></doc>
\ No newline at end of file
--- /dev/null
+<doc><foo></doc>
\ No newline at end of file
--- /dev/null
+<doc>ð€€ô¿½ô¿¿</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a="e"></doc>
\ No newline at end of file
--- /dev/null
+<doc> <a></a> <a></a>	<a></a> </doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc a1="%e;"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="1 2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="1 2"></doc>
\ No newline at end of file
--- /dev/null
+<doc a1="v1"></doc>
\ No newline at end of file
--- /dev/null
+<doc><?pi x
+y?></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a="""></doc>
\ No newline at end of file
--- /dev/null
+<doc><doc></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x	y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a=""></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc a="x y"></doc>
\ No newline at end of file
--- /dev/null
+<doc><a></a></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc>&foo;</doc>
\ No newline at end of file
--- /dev/null
+<doc>v</doc>
\ No newline at end of file
--- /dev/null
+<doc> </doc>
\ No newline at end of file
--- /dev/null
+<doc>]</doc>
\ No newline at end of file
--- /dev/null
+<doc>]]</doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ENTITY x PUBLIC "x" "" NDATA p>
+<!ENTITY y PUBLIC "x" "" NDATA p>
+<!ENTITY z PUBLIC "x" "" NDATA p>
+
+<!NOTATION p PUBLIC "image/gif">
+<!NOTATION q PUBLIC "image/jpeg">
+<!NOTATION r PUBLIC "image/png">
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el
+ cdata CDATA #IMPLIED
+ id ID #IMPLIED
+ idref IDREF #IMPLIED
+ idrefs IDREFS #IMPLIED
+ entity ENTITY #IMPLIED
+ entities ENTITIES #IMPLIED
+ nmtoken NMTOKEN #IMPLIED
+ nmtokens NMTOKENS #IMPLIED
+ enum (a|b|c) #IMPLIED
+ notation NOTATION (p|q|r) #IMPLIED
+>
+
+<!ELEMENT any ANY>
+]>
+
+<any>
+ <el cdata="a b c"/>
+ <el cdata=" a b c "/>
+ <el cdata=" a b c "/>
+ <el id="A"/>
+ <el id=" B "/>
+ <el id=" C "/>
+ <el idref="C"/>
+ <el idref=" A "/>
+ <el idref=" B "/>
+ <el idrefs="A B C"/>
+ <el idrefs=" A B C "/>
+ <el idrefs=" A B C "/>
+ <el entity="x"/>
+ <el entity=" x "/>
+ <el entity=" x "/>
+ <el entities="x y z"/>
+ <el entities=" x y z "/>
+ <el entities=" x y z "/>
+ <el nmtoken="a"/>
+ <el nmtoken=" a "/>
+ <el nmtoken=" a "/>
+ <el nmtokens="a b c"/>
+ <el nmtokens=" a b c "/>
+ <el nmtokens=" a b c "/>
+ <el enum="a"/>
+ <el enum=" a "/>
+ <el enum=" a "/>
+ <el notation="p"/>
+ <el notation=" p "/>
+ <el notation=" p "/>
+</any>
--- /dev/null
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el
+ cdata CDATA #IMPLIED
+ nmtoken NMTOKEN #IMPLIED
+ nmtokens NMTOKENS #IMPLIED
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+ <el cdata="a
+b
+c d
+e "/>
+ <el nmtoken=" a "/>
+ <el nmtoken="
+a
+"/>
+ <el nmtoken="
+a
+"/>
+ <el nmtoken=" a "/>
+ <el nmtokens=" a b c "/>
+ <el nmtokens="
+a
+b
+c
+"/>
+ <el nmtokens="
+a
+b
+c
+"/>
+ <el nmtokens=" a b c "/>
+</any>
--- /dev/null
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el
+ cdata CDATA #IMPLIED
+ nmtoken NMTOKEN #IMPLIED
+ nmtokens NMTOKENS #IMPLIED
+>
+<!ELEMENT any ANY>
+
+<!ENTITY elinstance
+ '<el cdata="a
+b
+c d
+e "/>
+ <el nmtoken=" a "/>
+ <el nmtoken="
+a
+"/>
+ <el nmtoken="
+a
+"/>
+ <el nmtoken=" a "/>
+ <el nmtokens=" a b c "/>
+ <el nmtokens="
+a
+b
+c
+"/>
+ <el nmtokens="
+a
+b
+c
+"/>
+ <el nmtokens=" a b c "/>'>
+]>
+
+<any>&elinstance;</any>
--- /dev/null
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el
+ cdata CDATA #IMPLIED
+>
+<!ELEMENT any ANY>
+
+<!ENTITY elinstance
+ '<el cdata="a
+b"/>'>
+]>
+
+<any>&elinstance;</any>
+
--- /dev/null
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el
+ cdata CDATA #IMPLIED
+ nmtoken NMTOKEN #IMPLIED
+ nmtokens NMTOKENS #IMPLIED
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+ <el cdata="a b c	d e "/>
+ <el nmtoken=" a "/>
+ <el nmtoken=" a "/>
+ <el nmtoken=" a "/>
+ <el nmtoken="	a	"/>
+ <el nmtokens=" a b c "/>
+ <el nmtokens=" a b c "/>
+ <el nmtokens=" a b c "/>
+ <el nmtokens="	a	b	c	"/>
+</any>
--- /dev/null
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el
+ nmtoken NMTOKEN #FIXED "a"
+ nmtokens NMTOKENS #FIXED "a b c"
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+ <el nmtoken="
+a
+"/>
+ <el nmtokens="
+a
+b
+c
+"/>
+</any>
--- /dev/null
+<?xml version="1.0"?>
+
+<!DOCTYPE any [
+
+<!ELEMENT el EMPTY>
+<!ATTLIST el
+ nmtoken NMTOKEN #FIXED "
+a
+"
+ nmtokens NMTOKENS #FIXED "a
+b
+c"
+>
+<!ELEMENT any ANY>
+]>
+
+<any>
+ <el nmtoken="
+a
+"/>
+ <el nmtokens="
+a
+b
+c
+"/>
+</any>
--- /dev/null
+001.xml tests whether additional white space in attribute value
+ is removed during normalization for every att type but
+ not for CDATA
+002.xml tests whether TABs, CRs, LFs, and CRLFs are converted
+ to spaces (only for CDATA, NMTOKEN, NMTOKENS)
+003.xml similar to 002.xml, but the attribute values occur
+ in internal entities
+004.xml tests whether CRLF normalization happens only once
+005.xml tests whether spaces, TABs, LFs, CRs, and CRLFs are correctly
+ processed if they are written as character references
+006.xml tests whether normalization is done before #FIXED comparison
+007.xml tests whether normalization is done before #FIXED comparison
--- /dev/null
+<any> <el cdata="a b c"></el> <el cdata=" a b c "></el> <el cdata=" a b c "></el> <el id="A"></el> <el id="B"></el> <el id="C"></el> <el idref="C"></el> <el idref="A"></el> <el idref="B"></el> <el idrefs="A B C"></el> <el idrefs="A B C"></el> <el idrefs="A B C"></el> <el entity="x"></el> <el entity="x"></el> <el entity="x"></el> <el entities="x y z"></el> <el entities="x y z"></el> <el entities="x y z"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el enum="a"></el> <el enum="a"></el> <el enum="a"></el> <el notation="p"></el> <el notation="p"></el> <el notation="p"></el> </any>
\ No newline at end of file
--- /dev/null
+<any> <el cdata="a b c d e "></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> </any>
\ No newline at end of file
--- /dev/null
+<any><el cdata="a b c d e "></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el></any>
\ No newline at end of file
--- /dev/null
+<any><el cdata="a b"></el></any>
\ No newline at end of file
--- /dev/null
+<any> <el cdata="a b c	d e "></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtoken="a"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> <el nmtokens="a b c"></el> </any>
\ No newline at end of file
--- /dev/null
+<any> <el nmtoken="a" nmtokens="a b c"></el> <el nmtoken="a" nmtokens="a b c"></el> </any>
\ No newline at end of file
--- /dev/null
+<any> <el nmtoken="a" nmtokens="a b c"></el> <el nmtoken="a" nmtokens="a b c"></el> </any>
\ No newline at end of file
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE schema [
+<!ELEMENT schema ANY>
+<!ELEMENT element ANY>
+<!ATTLIST element minOccurs CDATA #IMPLIED>
+]>
+<schema>
+ <element minOccurs='0'/>
+ <element minOccurs='0'/>
+</schema>
--- /dev/null
+<!DOCTYPE x [
+<!ELEMENT x ANY>
+]>
+<x/>
--- /dev/null
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<?pi 0?>
+]>
+<?pi 1?>
+<a>
+ <?pi 2?>
+ <a>
+ <?pi 3?>
+ </a>
+ <?pi 4?>
+</a>
+<?pi 5?>
\ No newline at end of file
--- /dev/null
+This directory contains real regression tests, i.e. it is tested whether
+reported bugs have been fixed.
+
+001.xml 2000-08-26: Haruo's single quote bug. Attvalues delimited
+ by single quotes did not work for the UTF-8 lexer.
+002+.xml 2000-08-26: Haruo's file-names-are-not-URLs bug. from_file
+ interpreted the file name as URL-encoded string. "002+.xml"
+ because the "+" must not be decoded as space.
+003.xml 2000-08-26: Alain's bug that data nodes must not be merged
+ where PI nodes are created. In the "comments" directory
+ there is another test for the case that comments delimit
+ data material
--- /dev/null
+<schema> 	<element minOccurs="0"></element> 	<element minOccurs="0"></element> </schema>
\ No newline at end of file
--- /dev/null
+<x></x>
\ No newline at end of file
--- /dev/null
+<?pi 1?><a> <?pi 2?> <a> <?pi 3?> </a> <?pi 4?> </a><?pi 5?>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!-- Comment 0 -->
+]>
+<!-- Comment 1 -->
+<a>
+ <!-- Comment -2 -->
+ <a>
+ <!-- Comment 3 -->
+ </a>
+ <!-- Comment 4 -->
+</a>
+<!-- Comment 5 -->
\ No newline at end of file
--- /dev/null
+001 Checks whether enable_comment_nodes works
--- /dev/null
+<!-- Comment 1 --><a> <!-- Comment -2 --> <a> <!-- Comment 3 --> </a> <!-- Comment 4 --> </a><!-- Comment 5 -->
\ No newline at end of file
--- /dev/null
+<!ELEMENT doc EMPTY>
+<![IGNORE[<!ATTLIST doc att CDATA #REQUIRED>]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "001.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!-- Only a precondition check for test 003: the first ATTLIST counts -->
+<!ATTLIST doc att CDATA #IMPLIED>
+<!ATTLIST doc att CDATA #REQUIRED>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc SYSTEM "002.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<![INCLUDE[<!ATTLIST doc att CDATA #IMPLIED>]]>
+<!ATTLIST doc att CDATA #REQUIRED>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE doc SYSTEM "003.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "IGNORE">
+<![%e;[<!ATTLIST doc att CDATA #REQUIRED>]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "004.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "INCLUDE">
+<![%e;[<!ATTLIST doc att CDATA #IMPLIED>]]>
+<!ATTLIST doc att CDATA #REQUIRED>
--- /dev/null
+<!DOCTYPE doc SYSTEM "005.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<![IGNORE[This is illegal here]]>
+
--- /dev/null
+<!DOCTYPE doc SYSTEM "006.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "]]>">
+<![IGNORE[%e;]]>
+
--- /dev/null
+<!DOCTYPE doc SYSTEM "007.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<![IGNORE[<!ENTITY e "]]>">]]>
+<![IGNORE[<!ENTITY e ']]>'>]]>
+
--- /dev/null
+<!DOCTYPE doc SYSTEM "008.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<![IGNORE[<!-- ]]> -->]]>
+<![IGNORE[x <!-- ]]> -->]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "009.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<![IGNORE[x <![IGNORE[xxx]]>]]>
+<![IGNORE[<![IGNORE[xxx]]>]]>
+<![IGNORE[x <![INCLUDE[xxx]]>]]>
+<![IGNORE[<![INCLUDE[xxx]]>]]>
--- /dev/null
+<!DOCTYPE doc SYSTEM "010.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc EMPTY>
+<![INCLUDE[ <![INCLUDE[ <!ATTLIST doc att CDATA #IMPLIED> ]]>
+ <![IGNORE[ xxx ]]>
+]]>
+<!ATTLIST doc att CDATA #REQUIRED>
+
--- /dev/null
+<!DOCTYPE doc SYSTEM "011.ent">
+<doc></doc>
--- /dev/null
+001 IGNORE works: <![IGNORE[ ... ]]>
+002 [precondition for 003] The first ATTLIST declaration for the same
+ attribute counts
+003 INCLUDE works: <![INCLUDE[ ... ]]>
+004 IGNORE works: <![%e;[ ... ]]> with e="IGNORE"
+005 INCLUDE works: <![%e;[ ... ]]> with e="INCLUDE"
+006 IGNORE works: <![IGNORE[ ... ]]> ignoring a section that would
+ be illegal
+007 Within ignored sections references to parameter references are
+ not resolved.
+ NOTE: You cannot derive this directly from the XML spec. because a
+ precise definition what "ignoring" means is missing. This property
+ is an interpretation of the statement about reliable parsing in
+ section 3.4.
+008 Ignored sections may contain string literals containing "]]>".
+ NOTE: same problem with XML spec as 007
+009 Ignored sections may contain comments containing "]]>".
+ NOTE: same problem with XML spec as 007
+010 Nested conditional sections with outermost IGNORE
+011 Nested conditional sections with outermost INCLUDE
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<!DOCTYPE a [
+ <!ELEMENT a ANY>
+ <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b/></a>
--- /dev/null
+<!DOCTYPE a [
+ <!ELEMENT a ANY>
+ <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b att1="1" att2=" 1 2 3 "/></a>
--- /dev/null
+<!DOCTYPE a [
+ <!ELEMENT a (b)>
+ <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b/></a>
--- /dev/null
+<!DOCTYPE a [
+ <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a><b/></a>
--- /dev/null
+<!DOCTYPE a [
+ <!ELEMENT a ANY>
+ <!ENTITY x SYSTEM "sample" NDATA m>
+ <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a/>
--- /dev/null
+<!DOCTYPE a [
+ <!ELEMENT a ANY>
+ <!ATTLIST a g ENTITY #IMPLIED>
+ <!ENTITY x SYSTEM "sample" NDATA m>
+ <?pxp:dtd optional-element-and-notation-declarations?>
+]>
+<a g="x"/>
--- /dev/null
+<!DOCTYPE a [
+ <!ELEMENT a ANY>
+ <?pxp:dtd optional-attribute-declarations elements="a"?>
+]>
+<a x="y"/>
--- /dev/null
+<?pxp:dtd optional-element-and-notation-declarations?>
+
+001.xml Whether it works for undeclared elements
+002.xml Whether it works for undeclared elements with attributes
+003.xml Whether it works for undeclared elements in declarations
+004.xml Whether it works for undeclared root elements
+005.xml Whether it works for undeclared notations
+006.xml Whether it works for undeclared notations which are actually
+ referred to
+
+<?pxp:dtd optional-attribute-declarations?>
+
+007.xml Whether it works
+
--- /dev/null
+<a><b></b></a>
\ No newline at end of file
--- /dev/null
+<a><b att1="1" att2=" 1 2 3 "></b></a>
\ No newline at end of file
--- /dev/null
+<a><b></b></a>
\ No newline at end of file
--- /dev/null
+<a><b></b></a>
\ No newline at end of file
--- /dev/null
+<a></a>
\ No newline at end of file
--- /dev/null
+<a g="x"></a>
\ No newline at end of file
--- /dev/null
+<a x="y"></a>
\ No newline at end of file
--- /dev/null
+#! /bin/bash
+
+check_dir () {
+ dir="$1"
+ shift
+ xmlfiles=`cd $dir && echo *.xml`
+ for file in $xmlfiles; do
+ echo -n "File $dir/$file: "
+ ./test_canonxml "$@" "$dir/$file" >out.xml
+ if cmp out.xml "$dir/out/$file"; then
+ echo "OK"
+ else
+ echo "NOT OK"
+ read
+ fi
+ done
+}
+
+check_dir "data_valid/conditional"
+check_dir "data_valid/att_normalization"
+check_dir "data_valid/optional_decls"
+check_dir "data_valid/comments" -comments
+check_dir "data_valid/bugfixes"
+
+#check_dir "data_jclark_valid/sa-problems"
+#check_dir "data_jclark_valid/ext-sa-problems"
+check_dir "data_jclark_valid/sa"
+check_dir "data_jclark_valid/not-sa"
+check_dir "data_jclark_valid/ext-sa"
+
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+ prerr_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ prerr_endline ("WARNING: " ^ w)
+ end
+;;
+
+let outbuf = String.create 8192;;
+
+let output_utf8 config s =
+ match config.encoding with
+ `Enc_utf8 ->
+ print_string s
+ | `Enc_iso88591 ->
+ for i = 0 to String.length s - 1 do
+ let c = Char.code(s.[i]) in
+ if c <= 127 then
+ print_char(Char.chr(c))
+ else begin
+ print_char(Char.chr(0xc0 lor (c lsr 6)));
+ print_char(Char.chr(0x80 lor (c land 0x3f)));
+ end
+ done
+ | _ -> assert false
+;;
+
+
+let re = Str.regexp "[&<>\"\009\010\013]";;
+
+let escaped s =
+ Str.global_substitute
+ re
+ (fun _ ->
+ match Str.matched_string s with
+ "&" -> "&"
+ | "<" -> "<"
+ | ">" -> ">"
+ | "\"" -> """
+ | "\009" -> "	"
+ | "\010" -> " "
+ | "\013" -> " "
+ | _ -> assert false
+ )
+ s
+;;
+
+
+let rec output_xml config n =
+ match n # node_type with
+ T_super_root ->
+ n # iter_nodes (output_xml config)
+ | T_pinstr pi_name ->
+ let [ pi ] = n # pinstr pi_name in
+ output_utf8 config "<?";
+ output_utf8 config (pi # target);
+ output_utf8 config " ";
+ output_utf8 config (pi # value);
+ output_utf8 config "?>";
+ | T_element name ->
+ output_utf8 config "<";
+ output_utf8 config name;
+ let sorted_attnames =
+ Sort.list ( <= ) (n # attribute_names) in
+ List.iter
+ (fun attname ->
+ match n # attribute attname with
+ Value v ->
+ output_utf8 config " ";
+ output_utf8 config attname;
+ output_utf8 config "=\"";
+ output_utf8 config (escaped v);
+ output_utf8 config "\"";
+ | Valuelist vl ->
+ let v = String.concat " " vl in
+ output_utf8 config " ";
+ output_utf8 config attname;
+ output_utf8 config "=\"";
+ output_utf8 config (escaped v);
+ output_utf8 config "\"";
+ | Implied_value ->
+ ()
+ )
+ sorted_attnames;
+ output_utf8 config ">";
+ n # iter_nodes (output_xml config);
+ output_utf8 config "</";
+ output_utf8 config name;
+ output_utf8 config ">";
+ | T_data ->
+ let v = n # data in
+ output_utf8 config (escaped v)
+ | T_comment ->
+ let v =
+ match n # comment with
+ None -> assert false
+ | Some x -> x
+ in
+ output_utf8 config ("<!--" ^ v ^ "-->")
+ | _ ->
+ assert false
+;;
+
+
+let parse debug wf iso88591 comments filename =
+ let spec =
+ let e = new element_impl default_extension in
+ e # keep_always_whitespace_mode;
+ make_spec_from_mapping
+ ~super_root_exemplar: e
+ ~default_pinstr_exemplar: e
+ ~comment_exemplar: e
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: e
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+ in
+ let config =
+ { default_config with
+ warner = new warner;
+ debugging_mode = debug;
+ enable_pinstr_nodes = true;
+ enable_super_root_node = true;
+ enable_comment_nodes = comments;
+ encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+ idref_pass = true;
+ }
+ in
+ try
+ let parse_fn =
+ if wf then parse_wfdocument_entity
+ else
+ let index = new hash_index in
+ parse_document_entity
+ ?transform_dtd:None
+ ~id_index:(index :> 'ext index)
+ in
+ let tree =
+ parse_fn
+ config
+ (from_file filename)
+ spec
+ in
+ output_xml config (tree # root)
+ with
+ e ->
+ error_happened := true;
+ prerr_error e
+;;
+
+
+let main() =
+ let debug = ref false in
+ let wf = ref false in
+ let iso88591 = ref false in
+ let comments = ref false in
+ let files = ref [] in
+ Arg.parse
+ [ "-d", Arg.Set debug,
+ " turn debugging mode on";
+ "-wf", Arg.Set wf,
+ " check only on well-formedness";
+ "-iso-8859-1", Arg.Set iso88591,
+ " use ISO-8859-1 as internal encoding instead of UTF-8";
+ "-comments", Arg.Set comments,
+ " output comments, too";
+ ]
+ (fun x -> files := x :: !files)
+ "
+usage: test_canonxml [options] file ...
+
+List of options:";
+ files := List.rev !files;
+ List.iter (parse !debug !wf !iso88591 !comments) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:32 lpadovan
+ * Initial revision
+ *
+ * Revision 1.8 2000/08/17 00:51:57 gerd
+ * Added -comments option to test enable_comment_nodes.
+ *
+ * Revision 1.7 2000/08/16 23:44:17 gerd
+ * Updates because of changes of the PXP API.
+ *
+ * Revision 1.6 2000/07/14 14:56:55 gerd
+ * Updated: warner.
+ *
+ * Revision 1.5 2000/07/14 14:17:58 gerd
+ * Updated because of iterface changes.
+ *
+ * Revision 1.4 2000/07/09 01:06:20 gerd
+ * Updated.
+ *
+ * Revision 1.3 2000/06/04 20:31:03 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 2000/05/20 20:34:28 gerd
+ * Changed for UTF-8 support.
+ *
+ * Revision 1.1 2000/04/30 20:13:01 gerd
+ * Initial revision.
+ *
+ * Revision 1.3 1999/11/09 22:27:30 gerd
+ * The programs returns now an exit code of 1 if one of the
+ * XML files produces an error.
+ *
+ * Revision 1.2 1999/09/01 23:09:56 gerd
+ * Added the option -wf that switches to well-formedness checking
+ * instead of validation.
+ *
+ * Revision 1.1 1999/08/14 22:20:53 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+compile: compile.ml
+ ocamlfind ocamlc -g -custom -o compile -package .,str -linkpkg compile.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa sample sample.ml out1 out2
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f compile
+
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+ prerr_endline (string_of_exn e)
+;;
+
+
+class warner =
+ object
+ method warn w =
+ prerr_endline ("WARNING: " ^ w)
+ end
+;;
+
+
+let compile in_filename out_filename print super_root pis comments =
+ let spec =
+ let e = new element_impl default_extension in
+ make_spec_from_mapping
+ ~super_root_exemplar: e
+ ~default_pinstr_exemplar: e
+ ~comment_exemplar: e
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: e
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+ in
+ let config =
+ { default_config with
+ encoding = `Enc_utf8;
+ warner = new warner;
+ enable_super_root_node = super_root;
+ enable_pinstr_nodes = pis;
+ enable_comment_nodes = comments;
+ }
+ in
+ try
+ let tree =
+ parse_document_entity
+ config
+ (from_file in_filename)
+ spec
+ in
+
+ let ch = open_out out_filename in
+ Pxp_codewriter.write_document ch tree;
+ output_string ch "(create_document (new Pxp_types.drop_warnings) Pxp_yacc.default_spec) # write (Pxp_types.Out_channel stdout) `Enc_utf8;;\n";
+ close_out ch;
+
+ if print then
+ tree # write (Out_channel stdout) `Enc_utf8;
+ with
+ e ->
+ error_happened := true;
+ prerr_error e
+;;
+
+
+let main() =
+ let in_file = ref "" in
+ let out_file = ref "" in
+ let print_file = ref false in
+ let super_root = ref false in
+ let pis = ref false in
+ let comments = ref false in
+ Arg.parse
+ [ "-in", (Arg.String (fun s -> in_file := s)),
+ " <file> Set the XML file to read";
+ "-out", (Arg.String (fun s -> out_file := s)),
+ " <file> Set the Ocaml file to write";
+ "-print", (Arg.Set print_file),
+ " Print the XML file in standard form";
+ "-super-root", Arg.Set super_root,
+ " Generate a super root node";
+ "-pis", Arg.Set pis,
+ " Generate wrapper nodes for processing instructions";
+ "-comments", Arg.Set comments,
+ " Generate nodes for comments";
+ ]
+ (fun x -> raise (Arg.Bad "Unexpected argument"))
+ "
+usage: compile [ options ]
+
+List of options:";
+ if !in_file = "" then begin
+ prerr_endline "No input file specified.";
+ exit 1
+ end;
+ if !out_file = "" then begin
+ prerr_endline "No output file specified.";
+ exit 1
+ end;
+ compile !in_file !out_file !print_file !super_root !pis !comments
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:35 lpadovan
+ * Initial revision
+ *
+ * Revision 1.4 2000/08/17 01:20:15 gerd
+ * Update: Also tested whether super root nodes, pinstr nodes
+ * and comment nodes work.
+ * Note: comment nodes are not fully tested yet.
+ *
+ * Revision 1.3 2000/08/16 23:44:19 gerd
+ * Updates because of changes of the PXP API.
+ *
+ * Revision 1.2 2000/07/16 17:54:15 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.1 2000/07/09 00:33:32 gerd
+ * Initial revision.
+ *
+ *)
--- /dev/null
+#! /bin/sh
+
+./test_codewriter sample001.xml
--- /dev/null
+<!DOCTYPE a [
+
+<!ELEMENT a (b | (c, d)* | (e, f)+ | g?)>
+<!ELEMENT b (#PCDATA | a)*>
+<!ELEMENT c EMPTY>
+<!ELEMENT d ANY>
+<!ELEMENT e EMPTY>
+<!ELEMENT f EMPTY>
+<!ELEMENT g EMPTY>
+
+<!ATTLIST a u CDATA #IMPLIED
+ v NMTOKEN "huhu"
+ w (q|p) #REQUIRED
+ x NOTATION (n1|n2) "n1"
+ y ENTITY #IMPLIED>
+
+<!NOTATION n1 SYSTEM "/bin/n1-processor">
+<!NOTATION n2 SYSTEM "/bin/n2-processor">
+
+<!ENTITY u1 SYSTEM "file-u1" NDATA n1>
+<!ENTITY u2 SYSTEM "file-u2" NDATA n2>
+
+<!-- comment 1 -->
+<?pi1 args ...?>
+]>
+
+<!-- comment 2 -->
+<a u="1" w="q" x="n2">
+ <!-- comment 3 -->
+ <b>
+ <?pi2 args ...?>
+ This is text!
+ <a w="p" y="u1">
+ <c/>
+ <d/>
+ </a>
+ </b>
+ <!-- comment 4 -->
+</a>
+
+<!-- comment 5 -->
+<?pi3 args ...?>
+<!-- comment 6 -->
+
--- /dev/null
+#! /bin/sh
+
+set -e
+
+sample="$1"
+echo "Testing $sample:"
+./compile -in "$sample" -out "sample.ml" -print -super-root -pis -comments >"out1"
+echo "- code written to sample.ml, formatted data to out1"
+OCAMLPATH=../.. ocamlfind ocamlc -package . -linkpkg -custom sample.ml -o sample
+echo "- sample.ml compiled to sample"
+./sample >out2
+echo "- re-read data written to out2"
+if cmp out1 out2; then
+ echo "- out1 and out2 are identical! OK"
+else
+ echo "- out1 and out2 differ! FAILURE!"
+ exit 1
+fi
--- /dev/null
+
+
+let dump_file name =
+ let ch = open_in_bin name in
+ let len = in_channel_length ch in
+ let sin = String.create len in
+ really_input ch sin 0 len;
+ close_in ch;
+
+ Printf.printf "\\noindent\\begin{minipage}{5.5cm}\n";
+ (* Printf.printf "\\rule{5.5cm}{1pt}\n"; *)
+ Printf.printf "\\footnotesize\\bf File %s:\\\\\n" name;
+ Printf.printf "\\tt{}";
+
+ for i = 0 to len - 1 do
+ match sin.[i] with
+ ('\000'..'\008'|'\011'|'\012'|'\014'..'\031'|'\127'..'\255') as c ->
+ Printf.printf "{\\sl (%02x)}\\linebreak[2]" (Char.code c)
+ | '\009' ->
+ Printf.printf "{\\sl HT}\\linebreak[3]"
+ | '\010' ->
+ Printf.printf "{\\sl LF}\\\\\n"
+ | '\013' ->
+ Printf.printf "{\\sl CR}";
+ if not(i < len - 1 && sin.[i+1] = '\010') then
+ Printf.printf "\\\\\n";
+ | ' ' ->
+ Printf.printf "\\symbol{32}\\linebreak[3]"
+
+ | ('"'|'#'|'$'|'%'|'&'|'-'|'<'|'>'|'['|'\\'|']'|'^'|'_'|'`'|
+ '{'|'|'|'}'|'~') as c ->
+ Printf.printf "\\symbol{%d}\\linebreak[2]" (Char.code c)
+ | c ->
+ print_char c;
+ print_string "\\linebreak[0]"
+ done;
+
+ Printf.printf "\\mbox{}\\\\\n";
+ Printf.printf "\\rule{5.5cm}{1pt}\n";
+ Printf.printf "\\end{minipage}\n"
+;;
+
+
+print_endline "\\documentclass[a4paper]{article}";
+print_endline "\\usepackage{multicol}";
+print_endline "\\begin{document}";
+print_endline "\\begin{multicols}{2}";
+for i = 1 to Array.length(Sys.argv)-1 do
+ dump_file Sys.argv.(i)
+done;
+print_endline "\\end{multicols}";
+print_endline "\\end{document}"
+;;
+
+
+
--- /dev/null
+# make test_negative: make bytecode executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_negative: test_negative.ml
+ ocamlfind ocamlc -custom -o test_negative -package .,str -linkpkg test_negative.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa current.out
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_negative
+
+
--- /dev/null
+----------------------------------------------------------------------
+Regression test "negative":
+----------------------------------------------------------------------
+
+- An errorneous XML file is parsed, and the error message is printed.
+
+- The output is compared with a reference file. The test is only
+ passed if the output and the reference are equal.
+
+- Test data "data_jclark_notwf":
+ Contains the samples by James Clark that are not well-formed.
+ The subdirectories:
+ - sa: standalone documents
+ - not-sa: non-standalone document (with external DTD)
+ - ext-sa: non-standalone document (with other external entity)
+
+- Test data "data_jclark_invalid":
+ Contains the samples by James Clark that are invalid.
+
+- Tests that are not passed have been moved into the *-problems directories.
+ The reason is typically that characters have been used not in the
+ Latin 1 character set.
+
+- Test data "data_notwf":
+ Contains own tests with samples that are not well-formed.
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/001.xml", at line 5, position 3:
+ERROR (Validity constraint): The root element is `b' but is declared as `a
--- /dev/null
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!ELEMENT b ANY>
+]>
+<b>x</b>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/010.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `id' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED>
+]>
+
+<el id="100"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/011.xml", at line 10, position 17:
+ERROR (Validity constraint): ID not unique
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED>
+]>
+
+<any>
+ <el id="x100"/>
+ <el id="x100"/>
+</any>
--- /dev/null
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/012.xml", at line 6, position 1:
+ERROR (Validity constraint): More than one ID attribute for element `el'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id1 ID #IMPLIED>
+<!ATTLIST el id2 ID #IMPLIED>
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/013.xml", at line 5, position 1:
+ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID "a">
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/014.xml", at line 5, position 1:
+ERROR (Validity constraint): ID attribute must be #IMPLIED or #REQUIRED; element `el'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #FIXED "a">
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/015.xml", at line 7, position 17:
+ERROR (Validity constraint): Attribute `idref' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idref IDREF #IMPLIED>
+]>
+
+<el idref="100"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/016.xml", at line 7, position 22:
+ERROR (Validity constraint): Attribute `idrefs' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idrefs IDREFS #IMPLIED>
+]>
+
+<el idrefs="100 200"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/017.xml" at line 12, position 2:
+ERROR (Validity constraint): Attribute `idref' of element `el' refers to unknown ID `a20'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED
+ idref IDREF #IMPLIED
+>
+]>
+
+<any>
+ <el id="a10"/>
+ <el idref="a20"/>
+</any>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/018.xml" at line 12, position 2:
+ERROR (Validity constraint): Attribute `idrefs' of element `el' refers to unknown ID `a20'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el id ID #IMPLIED
+ idrefs IDREFS #IMPLIED
+>
+]>
+
+<any>
+ <el id="a10"/>
+ <el idrefs="a10 a20"/>
+</any>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/019.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ent ENTITY #IMPLIED>
+]>
+
+<el ent="10"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/020.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ents ENTITIES #IMPLIED>
+]>
+
+<el ents="a 10"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/021.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ent ENTITY #IMPLIED>
+]>
+
+<el ent="x"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/022.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ents ENTITIES #IMPLIED>
+]>
+
+<el ents="ndata a"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/023.xml", at line 6, position 13:
+ERROR (Validity constraint): Attribute `nm' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nm NMTOKEN #IMPLIED>
+]>
+<el nm="[]"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/024.xml", at line 6, position 17:
+ERROR (Validity constraint): Attribute `nms' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nms NMTOKENS #IMPLIED>
+]>
+<el nms="10 []"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/025.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `idref' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idref IDREF "100">
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/026.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `idrefs' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el idrefs IDREFS "100 200">
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/027.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ent ENTITY "10">
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/028.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `x'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ENTITY ndata SYSTEM "" NDATA x>
+<!ELEMENT el EMPTY>
+<!ATTLIST el ents ENTITIES "a 10">
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/029.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `nm' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nm NMTOKEN "[]">
+]>
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/030.xml", at line 5, position 1:
+ERROR (Validity constraint): Default value for attribute `nms' is lexically malformed
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el nms NMTOKENS "10 []">
+]>
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/031.xml", at line 6, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `jpeg'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif|jpeg) #IMPLIED>
+]>
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/032.xml", at line 6, position 1:
+ERROR (Validity constraint): Illegal default value for attribute `n' in declaration for element `el'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif) "jpeg">
+]>
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/033.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `n' does not match one of the declared notation names
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif) #IMPLIED>
+]>
+<el n="jpeg"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/034.xml", at line 8, position 1:
+ERROR (Validity constraint): More than one NOTATION attribute for element `el'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!NOTATION gif PUBLIC "image/gif">
+<!ELEMENT el EMPTY>
+<!ATTLIST el n NOTATION (gif) #IMPLIED
+ m NOTATION (gif) #IMPLIED
+>
+]>
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/035.xml", at line 5, position 1:
+ERROR (Validity constraint): Illegal default value for attribute `enum' in declaration for element `el'
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el enum (a|b|c) "d">
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/036.xml", at line 7, position 14:
+ERROR (Validity constraint): Attribute `enum' does not match one of the declared enumerator tokens
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el enum (a|b|c) #IMPLIED>
+]>
+
+<el enum="d"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/037.xml", at line 7, position 5:
+ERROR (Validity constraint): Required attribute `x' is missing
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el x CDATA #REQUIRED>
+]>
+
+<el/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/038.xml", at line 7, position 13:
+ERROR (Validity constraint): Attribute `x' is fixed, but has here a different value
--- /dev/null
+<?xml version="1.0"?>
+<!DOCTYPE el [
+<!ELEMENT el EMPTY>
+<!ATTLIST el x CDATA #FIXED "abc">
+]>
+
+<el x="def"/>
--- /dev/null
+<!ATTLIST el v3 CDATA "ghi">
--- /dev/null
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/060.xml", at line 17, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 CDATA "abc">
+<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
+%declare_v2;
+<!ENTITY % declare_v3 SYSTEM "060.ent">
+%declare_v3;
+]>
+
+<any>
+ <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
+ <any><el v2="DEF" v3="GHI"/></any>
+ <any><el v3="GHI"/></any>
+ <any><el/></any>
+</any>
+
--- /dev/null
+<!ENTITY % declare_v3 '<!ATTLIST el v3 CDATA "ghi">'>
+
--- /dev/null
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/061.xml", at line 18, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 CDATA "abc">
+<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
+%declare_v2;
+<!ENTITY % declare_declare_v3 SYSTEM "061.ent">
+%declare_declare_v3;
+%declare_v3;
+]>
+
+<any>
+ <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
+ <any><el v2="DEF" v3="GHI"/></any>
+ <any><el v3="GHI"/></any>
+ <any><el/></any>
+</any>
+
--- /dev/null
+<!ATTLIST el v3 CDATA "ghi">
--- /dev/null
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/062.xml", at line 15, position 12:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any SYSTEM "062.ent" [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 CDATA "abc">
+<!ENTITY % declare_v2 '<!ATTLIST el v2 CDATA "def">'>
+%declare_v2;
+]>
+
+<any>
+ <any><el v1="ABC" v2="DEF" v3="GHI"/></any>
+ <any><el v2="DEF" v3="GHI"/></any>
+ <any><el v3="GHI"/></any>
+ <any><el/></any>
+</any>
+
--- /dev/null
+<!ENTITY e3 "ghi">
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/063.xml", at line 15, position 2:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 "abc">
+<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "063.ent">
+%declare_e3;
+]>
+
+<any>
+ &e1;
+ &e2;
+ &e3;
+</any>
+
--- /dev/null
+<!ENTITY e3 "ghi">
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/064.xml", at line 17, position 10:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 "abc">
+<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "064.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att CDATA #IMPLIED>
+]>
+
+<any>
+ <el att="&e1;"/>
+ <el att="&e2;"/>
+ <el att="&e3;"/>
+</any>
+
--- /dev/null
+<!ENTITY e3 "ghi">
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/065.xml", at line 13, position 24:
+ERROR (Validity constraint): Reference to entity `e3' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 "abc">
+<!ENTITY % declare_e2 '<!ENTITY e2 "def">'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "065.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att1 CDATA "&e1;"
+ att2 CDATA "&e2;"
+ att3 CDATA "&e3;"
+>
+]>
+
+<any>
+ <el att1="1" att2="2" att3="3"/>
+ <el att2="2" att3="3"/>
+ <el att3="3"/>
+ <el/>
+</any>
+
--- /dev/null
+<!ENTITY e3 SYSTEM "ghi" NDATA n3>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/066.xml", at line 13, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `n3'
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 SYSTEM "abc" NDATA n1>
+<!ENTITY % declare_e2 '<!ENTITY e2 SYSTEM "def" NDATA n2>'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "066.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att ENTITY #IMPLIED
+>
+]>
+
+<any>
+ <any><el att="e1"/></any>
+ <any><el att="e2"/></any>
+ <any><el att="e3"/></any>
+</any>
+
--- /dev/null
+<!ENTITY e3 SYSTEM "ghi" NDATA n3>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/067.xml", at line 15, position 1:
+ERROR (Validity constraint): Reference to undeclared notation `n3'
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ENTITY e1 SYSTEM "abc" NDATA n1>
+<!ENTITY % declare_e2 '<!ENTITY e2 SYSTEM "def" NDATA n2>'>
+%declare_e2;
+<!ENTITY % declare_e3 SYSTEM "067.ent">
+%declare_e3;
+<!ELEMENT el EMPTY>
+<!ATTLIST el att1 ENTITY "e1"
+ att2 ENTITY "e2"
+ att3 ENTITY "e3"
+>
+]>
+
+<any>
+ <any><el att1="e1" att2="e1" att3="e1"/></any>
+ <any><el att2="e1" att3="e1"/></any>
+ <any><el att3="e1"/></any>
+ <any><el/></any>
+</any>
+
--- /dev/null
+<!ATTLIST el v3 NMTOKEN #IMPLIED>
--- /dev/null
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/068.xml", at line 19, position 23:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 NMTOKEN #IMPLIED>
+<!ENTITY % declare_v2 '<!ATTLIST el v2 NMTOKEN #IMPLIED>'>
+%declare_v2;
+<!ENTITY % declare_v3 SYSTEM "068.ent">
+%declare_v3;
+]>
+
+<any>
+ <any><el v1="abc"/></any>
+ <any><el v2="abc"/></any>
+ <any><el v3="abc"/></any>
+ <any><el v1=" abc "/></any>
+ <any><el v2=" abc "/></any>
+ <any><el v3=" abc "/></any>
+</any>
+
--- /dev/null
+<!ATTLIST el v3 NMTOKENS #IMPLIED>
--- /dev/null
+WARNING: More than one ATTLIST declaration for element type `el'
+WARNING: More than one ATTLIST declaration for element type `el'
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/069.xml", at line 19, position 27:
+ERROR (Validity constraint): Attribute `v3' of element type `el' violates standalone declaration
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT el EMPTY>
+<!ATTLIST el v1 NMTOKENS #IMPLIED>
+<!ENTITY % declare_v2 '<!ATTLIST el v2 NMTOKENS #IMPLIED>'>
+%declare_v2;
+<!ENTITY % declare_v3 SYSTEM "069.ent">
+%declare_v3;
+]>
+
+<any>
+ <any><el v1="abc def"/></any>
+ <any><el v2="abc def"/></any>
+ <any><el v3="abc def"/></any>
+ <any><el v1=" abc def "/></any>
+ <any><el v2=" abc def "/></any>
+ <any><el v3=" abc def "/></any>
+</any>
+
--- /dev/null
+<!ELEMENT outer3 (inner)>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/070.xml", at line 19, position 32:
+ERROR (Validity constraint): Element `outer3' violates standalone declaration because extra white space separates the sub elements
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+
+<!DOCTYPE any [
+<!ELEMENT any ANY>
+<!ELEMENT inner EMPTY>
+<!ELEMENT outer1 (inner)>
+<!ENTITY % declare_outer2 '<!ELEMENT outer2 (inner)>'>
+%declare_outer2;
+<!ENTITY % declare_outer3 SYSTEM "070.ent">
+%declare_outer3;
+]>
+
+<any>
+ <any><outer1><inner/></outer1></any>
+ <any><outer2><inner/></outer2></any>
+ <any><outer3><inner/></outer3></any>
+ <any><outer1><inner/> </outer1></any>
+ <any><outer2><inner/> </outer2></any>
+ <any><outer3><inner/> </outer3></any>
+</any>
+
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/080.xml", at line 4, position 0:
+ERROR (Validity constraint): The content model of element `b' is not deterministic
--- /dev/null
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!ELEMENT b ((a,b)|a+)>]>
+<a/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_invalid/081.xml", at line 4, position 0:
+ERROR (Validity constraint): The content model of element `b' is not deterministic
--- /dev/null
+<!DOCTYPE a [
+<!ELEMENT a ANY>
+<!ELEMENT b ((b|a+),a)>]>
+<a/>
--- /dev/null
+----------------------------------------
+Root element
+----------------------------------------
+
+001.xml Declared root element type matches actual root element type
+
+----------------------------------------
+Attributes
+----------------------------------------
+
+010.xml ID attributes must match the Name production (not nmtoken)
+011.xml ID attributes uniquely identify the element bearing them
+ *** TODO ***
+012.xml No element type must have several ID attributes declared
+013.xml No ID attribute must have a default
+014.xml No ID attribute must have a default (FIXED)
+015.xml Attributes of type IDREF must match the Name production
+016.xml Attributes of type IDREFS must match the Names production
+017.xml Attributes of type IDREF must match the value of an ID
+ attribute
+ *** TODO ***
+018.xml Attributes of type IDREFS must match the values of ID
+ attributes
+ *** TODO ***
+019.xml Attributes of type ENTITY must match the Name production
+020.xml Attributes of type ENTITIES must match the Names production
+021.xml Attributes of type ENTITY must match an unparsed entity
+022.xml Attributes of type ENTITIES must match unparsed entities
+023.xml Attributes of type NMTOKEN must match the nmtoken production
+024.xml Attributes of type NMTOKENS must match the nmtokens production
+025.xml like 015.xml, but the default value is tested
+026.xml like 016.xml, but the default value is tested
+027.xml like 019.xml, but the default value is tested
+028.xml like 020.xml, but the default value is tested
+029.xml like 023.xml, but the default value is tested
+030.xml like 024.xml, but the default value is tested
+031.xml all notation names in the declaration must have been declared
+032.xml Values of NOTATION type must match one declared value
+033.xml Values of NOTATION type must match one declared value
+034.xml Only one NOTATION attribute per element
+035.xml Values of enum type must match one of the declared values
+036.xml Values of enum type must match one of the declared values
+037.xml missing #REQUIRED attribute
+038.xml #FIXED attributes must match the declared default
+
+----------------------------------------
+Standalone declaration
+----------------------------------------
+
+060.xml Externally declared default values are rejected
+061.xml variant of 060.xml (internal entity within external entity)
+062.xml variant of 060.xml (external subset of DTD)
+063.xml Externally declared parsed general entities are rejected
+ (entity ref occurs in main text)
+064.xml Externally declared parsed general entities are rejected
+ (entity ref occurs in attribute value)
+065.xml Externally declared parsed general entities are rejected
+ (entity ref occurs in attribute default)
+ *** THINK ABOUT THIS CASE AGAIN ***
+066.xml Externally declared unparsed entities are rejected
+ (entity ref occurs in attribute value)
+067.xml Externally declared unparsed entities are rejected
+ (entity ref occurs in attribute default)
+068.xml Externally declared NMTOKEN attributes require normal form
+069.xml Externally declared NMTOKENS attributes require normal form
+070.xml Externally declared elements with regexp content model
+ do not like extra white space
+
+----------------------------------------
+Deterministics models
+----------------------------------------
+
+080.xml One example
+081.xml Another example
+
--- /dev/null
+<!ELEMENT doc EMPTY>
+<!ENTITY % e "<!--">
+%e; -->
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/001.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "001.ent", at line 3, position 3:
+ERROR (Well-formedness constraint): `-->' expected
--- /dev/null
+<!DOCTYPE doc SYSTEM "001.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e "(#PCDATA">
+<!ELEMENT doc %e;)>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/002.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "002.ent", at line 2, position 18:
+ERROR (Validity constraint): Entities not properly nested with parentheses
--- /dev/null
+<!DOCTYPE doc SYSTEM "002.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e "<!ELEMENT ">
+%e; doc (#PCDATA)>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/003.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "003.ent", at line 2, position 17:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
--- /dev/null
+<!DOCTYPE doc SYSTEM "003.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e1 "<!ELEMENT ">
+<!ENTITY % e2 ">">
+%e1; doc (#PCDATA) %e2;
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/004.xml", at line 1, position 30:
+In entity e2, at line 1, position 1:
+Called from entity [dtd] = SYSTEM "004.ent", line 3, position 19:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
--- /dev/null
+<!DOCTYPE doc SYSTEM "004.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e ">">
+<!ELEMENT doc (#PCDATA) %e;
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/005.xml", at line 1, position 30:
+In entity e, at line 1, position 1:
+Called from entity [dtd] = SYSTEM "005.ent", line 2, position 24:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
--- /dev/null
+<!DOCTYPE doc SYSTEM "005.ent">
+<doc></doc>
--- /dev/null
+<!ENTITY % e "(#PCDATA)>">
+<!ELEMENT doc %e;
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_invalid/006.xml", at line 1, position 30:
+In entity e, at line 1, position 10:
+Called from entity [dtd] = SYSTEM "006.ent", line 2, position 14:
+ERROR (Validity constraint): Entities not properly nested with ELEMENT declaration
--- /dev/null
+<!DOCTYPE doc SYSTEM "006.ent">
+<doc></doc>
--- /dev/null
+&e;
\ No newline at end of file
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/001.xml", at line 3, position 1:
+ERROR (Validity constraint): The root element is not declared
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "001.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+data
+
--- /dev/null
+In entity e = SYSTEM "002.ent", at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/002.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Bad XML declaration
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "002.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<?xml version="1.0"?><?xml version="1.0"?>
+data
--- /dev/null
+In entity e = SYSTEM "003.ent", at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/ext-sa/003.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Bad XML declaration
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e SYSTEM "003.ent">
+]>
+<doc>&e;</doc>
--- /dev/null
+<![ INCLUDE [
+<!ELEMENT doc (#PCDATA)>
+]>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/001.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "001.ent", at line 3, position 0:
+ERROR (Well-formedness constraint): `>]>' expected
--- /dev/null
+<!DOCTYPE doc SYSTEM "001.ent">
+<doc></doc>
--- /dev/null
+In entity e, at line 1, position 1:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/002.xml", line 4, position 0:
+ERROR (Well-formedness constraint): `]' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "<?xml version='1.0' encoding='UTF-8'?>">
+%e;
+]>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![ IGNORE [
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/003.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "003.ent", at line 2, position 11:
+ERROR (Well-formedness constraint): Bad conditional section
--- /dev/null
+<!DOCTYPE doc SYSTEM "003.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+<![ INCLUDE [
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/004.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "004.ent", at line 3, position 0:
+ERROR (Well-formedness constraint): `>]>' expected
--- /dev/null
+<!DOCTYPE doc SYSTEM "004.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
+%e;
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/005.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "005.ent", at line 2, position 0:
+ERROR (Well-formedness constraint): Reference to undeclared parameter entity `e'
--- /dev/null
+<!DOCTYPE doc SYSTEM "005.ent">
+<doc></doc>
--- /dev/null
+<![INCLUDE
+<!ELEMENT doc (#PCDATA)>
+]]>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/006.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "006.ent", at line 2, position 0:
+ERROR (Well-formedness constraint): Bad conditional section
--- /dev/null
+<!DOCTYPE doc SYSTEM "006.ent">
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/007.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "007.ent", at line 1, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<!DOCTYPE doc SYSTEM "007.ent">
+<doc></doc>
--- /dev/null
+<!ELEMENT doc ANY>
+<!ENTITY e "100%">
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/not-sa/008.xml", at line 1, position 30:
+In entity [dtd] = SYSTEM "008.ent", at line 2, position 17:
+ERROR (Well-formedness constraint): The character '%' must be written as '%'
--- /dev/null
+<!DOCTYPE doc SYSTEM "008.ent">
+<doc></doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<゚></゚>">
+]>
+<doc>&e;</doc>
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<X๜></X๜>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/001.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): Illegal inside tags
--- /dev/null
+<doc>
+<doc
+?
+<a</a>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/002.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
--- /dev/null
+<doc>
+<.doc></.doc>
+</doc>
+
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/003.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal token or character
--- /dev/null
+<doc><? ?></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/004.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
--- /dev/null
+<doc><?target some data></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/005.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
--- /dev/null
+<doc><?target some data?</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/006.xml", at line 1, position 20:
+ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
--- /dev/null
+<doc><!-- a comment -- another --></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/007.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<doc>& no refc</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/008.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<doc>&.entity;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/009.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<doc>&#RE;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/010.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<doc>A & B</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/011.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): Bad attribute list
--- /dev/null
+<doc a1></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/012.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Bad attribute list
--- /dev/null
+<doc a1=v1></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/013.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
--- /dev/null
+<doc a1="v1'></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/014.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Attribute value contains character '<' literally
--- /dev/null
+<doc a1="<foo>"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/015.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): Bad attribute list
--- /dev/null
+<doc a1=></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/016.xml", at line 1, position 13:
+ERROR (Well-formedness constraint): `>' or `/>' expected
--- /dev/null
+<doc a1="v1" "v2"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/017.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<doc><![CDATA[</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/018.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<doc><![CDATA [ stuff]]></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/019.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/020.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<doc a1="A & B"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/021.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<doc a1="a&b"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/022.xml", at line 1, position 8:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<doc a1="{:"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/023.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
--- /dev/null
+<doc 12="34"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/024.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
--- /dev/null
+<doc>
+<123></123>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/025.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
--- /dev/null
+<doc>]]></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/026.xml", at line 1, position 6:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
--- /dev/null
+<doc>]]]></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/027.xml", at line 4, position 0:
+ERROR (Well-formedness constraint): `-->' expected
--- /dev/null
+<doc>
+<!-- abc
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/028.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Illegal processing instruction
--- /dev/null
+<doc>
+<?a pi that is not closed
+</doc>
+
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/029.xml", at line 1, position 9:
+ERROR (Well-formedness constraint): The sequence ']]>' must be written as ']]>'
--- /dev/null
+<doc>abc]]]>def</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/030.xml", at line 1, position 18:
+ERROR: Bad character stream
--- /dev/null
+<doc>A form feed (\f) is not legal in data</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/031.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Illegal processing instruction
--- /dev/null
+<doc><?pi a form feed (\f) is not allowed in a pi?></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/032.xml", at line 1, position 23:
+ERROR: Bad character stream
--- /dev/null
+<doc><!-- a form feed (\f) is not allowed in a comment --></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/033.xml", at line 1, position 8:
+ERROR: Bad character stream
--- /dev/null
+<doc>abc\edef</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/034.xml", at line 1, position 4:
+ERROR: Bad character stream
--- /dev/null
+<doc\f>A form-feed is not white space or a name character</doc\f>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/035.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
--- /dev/null
+<doc>1 < 2 but not in XML</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/036.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Data not allowed here
--- /dev/null
+<doc></doc>
+Illegal data
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/037.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Character reference not allowed here
--- /dev/null
+<doc></doc>
+ 
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/038.xml", at line 1, position 29:
+ERROR (Well-formedness constraint): Attribute `x' occurs twice in element `doc'
--- /dev/null
+<doc x="foo" y="bar" x="baz"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/039.xml", at line 1, position 12:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
--- /dev/null
+<doc><a></aa></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/040.xml", at line 2, position 5:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
--- /dev/null
+<doc></doc>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/041.xml", at line 2, position 5:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
--- /dev/null
+<doc/>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/042.xml", at line 1, position 11:
+SYNTAX ERROR
--- /dev/null
+<doc/></doc/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/043.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Data not allowed here
--- /dev/null
+<doc/>
+Illegal data
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/044.xml", at line 1, position 12:
+ERROR (Well-formedness constraint): Document must consist of only one toplevel element
--- /dev/null
+<doc/><doc/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/045.xml", at line 2, position 2:
+ERROR (Well-formedness constraint): Illegal inside tags
--- /dev/null
+<doc>
+<a/
+</doc>
+
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/046.xml", at line 2, position 2:
+ERROR (Well-formedness constraint): Illegal inside tags
--- /dev/null
+<doc>
+<a/</a>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/047.xml", at line 2, position 3:
+ERROR (Well-formedness constraint): Illegal inside tags
--- /dev/null
+<doc>
+<a / >
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/048.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): CDATA section not allowed here
--- /dev/null
+<doc>
+</doc>
+<![CDATA[]]>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/049.xml", at line 3, position 15:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
--- /dev/null
+<doc>
+<a><![CDATA[xyz]]]></a>
+<![CDATA[]]></a>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/050.xml", at line 1, position 0:
+SYNTAX ERROR
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/051.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<!-- a comment -->
+<![CDATA[]]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/052.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
--- /dev/null
+<!-- a comment -->
+ 
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/053.xml", at line 1, position 10:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
--- /dev/null
+<doc></DOC>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/054.xml", at line 2, position 36:
+ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY foo PUBLIC "some public id">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/055.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Illegal token or character
--- /dev/null
+<!DOCTYPE doc [
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/056.xml", at line 1, position 14:
+ERROR (Well-formedness constraint): Content not allowed here
--- /dev/null
+<!DOCTYPE doc -- a comment -- []>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/057.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): `>' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "whatever" -- a comment -->
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/058.xml", at line 3, position 21:
+ERROR (Well-formedness constraint): `|' and more names expected, or `)'
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 (foo,bar) #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/059.xml", at line 3, position 25:
+ERROR (Well-formedness constraint): #REQUIRED, #IMPLIED, #FIXED or a string literal expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 NMTOKEN v1>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/060.xml", at line 3, position 21:
+ERROR (Well-formedness constraint): One of CDATA, ID, IDREF, IDREFS, ENTITY, ENTITIES, NMTOKEN, NMTOKENS, NOTATION, or a subexpression expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 NAME #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/061.xml", at line 2, position 28:
+ERROR (Well-formedness constraint): Whitespace is missing between the literals of the PUBLIC identifier
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e PUBLIC "whatever""e.ent">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/062.xml", at line 2, position 12:
+ERROR (Well-formedness constraint): Whitespace is missing
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY foo"some text">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/063.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
--- /dev/null
+<!DOCTYPE doc [
+<![INCLUDE[ ]]>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/064.xml", at line 3, position 20:
+ERROR (Well-formedness constraint): Whitespace is missing
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST e a1 CDATA"foo">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/065.xml", at line 3, position 16:
+ERROR (Well-formedness constraint): Whitespace is missing
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1(foo|bar) #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/066.xml", at line 3, position 26:
+ERROR (Well-formedness constraint): Whitespace is missing
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 (foo|bar)#IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/067.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Whitespace is missing
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 (foo)"foo">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/068.xml", at line 3, position 25:
+ERROR (Well-formedness constraint): Error in NOTATION type (perhaps missing whitespace after NOTATION?)
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a1 NOTATION(foo) #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/069.xml", at line 4, position 38:
+ERROR (Well-formedness constraint): Whitespace missing before `NDATA'
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION eps SYSTEM "eps.exe">
+<!-- missing space before NDATA -->
+<!ENTITY foo SYSTEM "foo.eps"NDATA eps>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/070.xml", at line 1, position 40:
+ERROR (Well-formedness constraint): Double hyphens are illegal inside comments
--- /dev/null
+<!-- a comment ending with three dashes --->
+<doc></doc>
--- /dev/null
+In entity e3, at line 1, position 0:
+Called from entity e2, line 1, position 0:
+Called from entity e1, line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/071.xml", line 6, position 5:
+ERROR (Validity constraint): Recursive reference to entity `e1'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+]>
+<doc>&e1;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/072.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
--- /dev/null
+<doc>&foo;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/073.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `f'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "whatever">
+]>
+<doc>&f;</doc>
--- /dev/null
+In entity e, at line 1, position 5:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/074.xml", line 5, position 5:
+ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "</foo><foo>">
+]>
+<doc>
+<foo>&e;</foo>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/075.xml", at line 6, position 7:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+]>
+<doc a="&e1;"></doc>
+
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/076.xml", at line 1, position 7:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
--- /dev/null
+<doc a="&foo;"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/077.xml", at line 4, position 7:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `bar'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY foo "&bar;">
+]>
+<doc a="&foo;"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/078.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `foo'
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&foo;">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/079.xml", at line 6, position 22:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e1;">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/080.xml", at line 6, position 29:
+ERROR (Well-formedness constraint): Recursive reference to general entity `e1'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e1 "&e2;">
+<!ENTITY e2 "&e3;">
+<!ENTITY e3 "&e1;">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #FIXED "&e1;">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/081.xml", at line 4, position 7:
+Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul">
+]>
+<doc a="&e;"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/082.xml", at line 4, position 22:
+Other exception: Sys_error("/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/nul: No such file or directory")
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/083.xml", at line 4, position 5:
+ERROR (Validity constraint): Invalid reference to NDATA entity e
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul" NDATA n>
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/084.xml", at line 4, position 22:
+ERROR (Validity constraint): Invalid reference to NDATA entity e
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "nul" NDATA n>
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/085.xml", at line 1, position 25:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
--- /dev/null
+<!DOCTYPE doc PUBLIC "[" "null.ent">
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/086.xml", at line 2, position 24:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY foo PUBLIC "[" "null.xml">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/087.xml", at line 2, position 36:
+ERROR (Well-formedness constraint): Illegal character in PUBLIC identifier
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION foo PUBLIC "[" "null.ent">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/088.xml", at line 6, position 7:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+<!ENTITY e '"'>
+]>
+<doc a="&e;></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/089.xml", at line 2, position 32:
+ERROR (Well-formedness constraint): `>' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % foo SYSTEM "foo.xml" NDATA bar>
+]>
+<doc></doc>
--- /dev/null
+In entity e, at line 1, position 7:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/090.xml", line 4, position 5:
+ERROR (Well-formedness constraint): Attribute value contains character '<' literally
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<foo a='<'></foo>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/091.xml", at line 3, position 32:
+ERROR (Well-formedness constraint): `>' expected
--- /dev/null
+<!DOCTYPE doc [
+<!NOTATION n SYSTEM "n">
+<!ENTITY % foo SYSTEM "foo.xml" NDATA n>
+]>
+<doc></doc>
--- /dev/null
+In entity e, at line 1, position 7:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/092.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<foo a='&'></foo>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/093.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<doc>X</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/094.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
--- /dev/null
+<?xml VERSION="1.0"?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/095.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
--- /dev/null
+<?xml encoding="UTF-8" version="1.0"?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/096.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
--- /dev/null
+<?xml version="1.0"encoding="UTF-8" ?>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/097.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
--- /dev/null
+<?xml version="1.0' encoding="UTF-8" ?>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/098.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
--- /dev/null
+<?xml version="1.0" version="1.0"?>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/099.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
--- /dev/null
+<?xml version="1.0" valid="no" ?>
+<doc></doc>
\ No newline at end of file
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/100.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Illegal 'standalone' declaration
--- /dev/null
+<?xml version="1.0" standalone="YES" ?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/101.xml", at line 1, position 0:
+Other exception: Failure("Netconversion.encoding_of_string: unknown encoding")
--- /dev/null
+<?xml version="1.0" encoding=" UTF-8"?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/102.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML version string
--- /dev/null
+<?xml version="1.0 " ?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/103.xml", at line 4, position 13:
+ERROR (Well-formedness constraint): End-tag does not match start-tag
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<foo>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/104.xml", at line 4, position 13:
+ERROR (Well-formedness constraint): End-tag not in the same entity as the start-tag
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<foo>">
+]>
+<doc>&e;</foo></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/105.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<?pi stuff?>
+<![CDATA[]]>
+<doc>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/106.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
--- /dev/null
+<?pi data?>
+ <doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/107.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Restriction of the internal subset: Conditional sections not allowed
--- /dev/null
+<!DOCTYPE doc [
+<![CDATA[]]>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/108.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<doc>
+<![CDATA [ ]]>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/109.xml", at line 4, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<doc></doc>">
+]>
+&e;
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/110.xml", at line 5, position 3:
+ERROR (Well-formedness constraint): Entity reference not allowed here
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "">
+]>
+<doc></doc>
+&e;
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/111.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "foo='bar'">
+]>
+<doc &e;></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/112.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<doc>
+<![cdata[data]]>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/113.xml", at line 2, position 18:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % foo "&">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/114.xml", at line 2, position 16:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY foo "&">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/115.xml", at line 4, position 7:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "&">
+]>
+<doc a="&e;"></doc>
--- /dev/null
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/116.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "&#9">
+]>
+<doc>&e;7;</doc>
--- /dev/null
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/117.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "&">
+]>
+<doc>&e;#97;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/118.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "#">
+]>
+<doc>&&e;97;</doc>
--- /dev/null
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/119.xml", line 5, position 0:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "&">
+]>
+<doc>
+&e;#38;
+</doc>
--- /dev/null
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/120.xml", line 5, position 0:
+ERROR (Well-formedness constraint): The ampersand '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "&">
+]>
+<doc>
+&e;
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/121.xml", at line 2, position 9:
+ERROR (Well-formedness constraint): Illegal token or character
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY #DEFAULT "default">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/122.xml", at line 2, position 27:
+ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a, (b) | c)?>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/123.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): `>' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc ((doc?)))>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/124.xml", at line 2, position 19:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (doc|#PCDATA)*>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/125.xml", at line 2, position 16:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc ((#PCDATA))>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/126.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)+>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/127.xml", at line 2, position 22:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)?>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/128.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc CDATA>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/129.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): Content model expression expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc - - (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/130.xml", at line 2, position 21:
+ERROR (Well-formedness constraint): `>' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (doc?) +(foo)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/131.xml", at line 2, position 21:
+ERROR (Well-formedness constraint): `>' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (doc?) -(foo)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/132.xml", at line 2, position 41:
+ERROR (Well-formedness constraint): It is not allowed to mix alternatives and sequences
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a, (b, c), (d, (e, f) | g))?>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/133.xml", at line 2, position 17:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a *)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/134.xml", at line 2, position 18:
+ERROR (Well-formedness constraint): `>' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a) *>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/135.xml", at line 2, position 17:
+ERROR (Well-formedness constraint): References to general entities not allowed in DTDs
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (a & b)?>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/136.xml", at line 2, position 14:
+ERROR (Well-formedness constraint): EMPTY, ANY, or a subexpression expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc O O (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/137.xml", at line 2, position 13:
+ERROR (Well-formedness constraint): Whitespace is missing
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc(#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/138.xml", at line 2, position 19:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (doc*?)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/139.xml", at line 2, position 15:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc ()>
+]>
+<doc></doc>
--- /dev/null
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/140.xml", line 4, position 5:
+ERROR (Well-formedness constraint): The left angle bracket '<' must be written as '<'
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<゚></゚>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity e, at line 1, position 2:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/141.xml", line 4, position 5:
+ERROR (Well-formedness constraint): Illegal inside tags
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<X๜></X๜>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/142.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 0 outside the accepted range of code points
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>�</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/143.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 31 outside the accepted range of code points
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/144.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 65535 outside the accepted range of code points
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/145.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 55296 outside the accepted range of code points
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>�</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/146.xml", at line 4, position 5:
+ERROR (Well-formedness constraint): Code point 1114112 outside the accepted range of code points
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>�</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/147.xml", at line 2, position 0:
+SYNTAX ERROR
--- /dev/null
+
+<?xml version="1.0"?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/148.xml", at line 2, position 0:
+SYNTAX ERROR
--- /dev/null
+<!-- -->
+<?xml version="1.0"?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/149.xml", at line 3, position 0:
+ERROR (Well-formedness constraint): `]' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<?xml version="1.0"?>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/150.xml", at line 2, position 0:
+SYNTAX ERROR
--- /dev/null
+<doc>
+<?xml version="1.0"?>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/151.xml", at line 3, position 0:
+SYNTAX ERROR
--- /dev/null
+<doc>
+</doc>
+<?xml version="1.0"?>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/152.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Bad XML declaration
--- /dev/null
+<?xml encoding="UTF-8"?>
+<doc></doc>
--- /dev/null
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/153.xml", line 5, position 5:
+SYNTAX ERROR
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "<?xml encoding='UTF-8'?>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/154.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
--- /dev/null
+<?XML version="1.0"?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/155.xml", at line 1, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
--- /dev/null
+<?xmL version="1.0"?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/156.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
--- /dev/null
+<doc>
+<?xMl version="1.0"?>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/157.xml", at line 2, position 0:
+ERROR (Well-formedness constraint): Reserved processing instruction
--- /dev/null
+<doc>
+<?xmL?>
+</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/158.xml", at line 4, position 10:
+ERROR (Well-formedness constraint): Illegal token or character
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!NOTATION gif PUBLIC "image/gif" "">
+<!ATTLIST #NOTATION gif a1 CDATA #IMPLIED>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/159.xml", at line 3, position 38:
+ERROR (Well-formedness constraint): The character '&' must be written as '&'
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY e "<![CDATA[Tim & Michael]]>">
+]>
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/160.xml", at line 4, position 18:
+ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "">
+<!ENTITY foo "%e;">
+]>
+<doc></doc>
--- /dev/null
+In entity e, at line 1, position 9:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/161.xml", line 3, position 15:
+ERROR (Well-formedness constraint): Bad content model expression
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY % e "#PCDATA">
+<!ELEMENT doc (%e;)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/162.xml", at line 4, position 20:
+ERROR (Well-formedness constraint): Restriction of the internal subset: parameter entity not allowed here
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e1 "">
+<!ENTITY % e2 "%e1;">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/163.xml", at line 5, position 0:
+ERROR (Well-formedness constraint): Content not allowed here
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "">
+]>
+%e;
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/164.xml", at line 4, position 2:
+ERROR (Well-formedness constraint): References to parameter entities not allowed here
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "">
+] %e; >
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/165.xml", at line 2, position 8:
+ERROR (Well-formedness constraint): Whitespace is missing after ENTITY
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY% e "">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/166.xml", at line 1, position 0:
+ERROR: Bad character stream
--- /dev/null
+<doc>ï¿¿</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/167.xml", at line 1, position 0:
+ERROR: Bad character stream
--- /dev/null
+<doc>￾</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/168.xml", at line 1, position 0:
+ERROR: Bad character stream
--- /dev/null
+<doc>í €</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/169.xml", at line 1, position 0:
+ERROR: Bad character stream
--- /dev/null
+<doc>í°€</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/170.xml", at line 1, position 0:
+ERROR: Bad character stream
--- /dev/null
+<doc>÷€€€</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/171.xml", at line 1, position 5:
+ERROR: Bad character stream
--- /dev/null
+<!-- ï¿¿ -->
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/172.xml", at line 1, position 0:
+ERROR: Bad character stream
--- /dev/null
+<?pi ï¿¿?>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/173.xml", at line 1, position 7:
+ERROR: Bad character stream
--- /dev/null
+<doc a="ï¿¿"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/174.xml", at line 1, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<doc><![CDATA[ï¿¿]]></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/175.xml", at line 3, position 18:
+ERROR: Bad character stream
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ENTITY % e "ï¿¿">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/176.xml", at line 5, position 0:
+ERROR (Well-formedness constraint): Missing end tag
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/177.xml", at line 4, position 6:
+ERROR: Bad character stream
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>Aï¿¿</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/178.xml", at line 5, position 7:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA #IMPLIED>
+]>
+<doc a=""></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/179.xml", at line 2, position 11:
+ERROR (Well-formedness constraint): Cannot find the second quotation mark
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "">
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/180.xml", at line 3, position 22:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+<!ENTITY e "v">
+]>
+<doc></doc>
--- /dev/null
+In entity e, at line 1, position 0:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/181.xml", line 5, position 5:
+ERROR (Well-formedness constraint): Declaration either malformed or not allowed in this context
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<![CDATA[">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e;]]></doc>
--- /dev/null
+In entity e, at line 1, position 4:
+Called from entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/182.xml", line 5, position 5:
+ERROR (Well-formedness constraint): `-->' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e "<!--">
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>&e;--></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/183.xml", at line 2, position 28:
+ERROR (Well-formedness constraint): `)*' expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA | foo*)* >
+<!ELEMENT foo EMPTY>
+]>
+<doc></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/184.xml", at line 2, position 25:
+ERROR (Well-formedness constraint): Name expected
--- /dev/null
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA | (foo))* >
+<!ELEMENT foo EMPTY>
+]>
+<doc></doc>
+
--- /dev/null
+<!ELEMENT doc (#PCDATA)>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/185.xml", at line 3, position 5:
+ERROR (Well-formedness constraint): Reference to undeclared general entity `e'
--- /dev/null
+<?xml version="1.0" standalone="yes"?>
+<!DOCTYPE doc SYSTEM "185.ent">
+<doc>&e;</doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_jclark_notwf/sa/186.xml", at line 5, position 15:
+ERROR (Well-formedness constraint): Whitespace is missing between attributes `b' and `d'
--- /dev/null
+<!DOCTYPE a [
+<!ELEMENT a EMPTY>
+<!ATTLIST a b CDATA #IMPLIED d CDATA #IMPLIED>
+]>
+<a b="c"d="e"/>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/001.xml", at line 4, position 7:
+ERROR (Validity constraint): Found reference to external entity in attribute value
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "null.ent">
+]>
+<doc a="&e;"></doc>
--- /dev/null
+In entity [toplevel] = SYSTEM "file://localhost/home/gerd/ocaml/smcvs/ocamlpkg/markup/rtests/negative/data_notwf/sa/002.xml", at line 4, position 22:
+ERROR (Validity constraint): Found reference to external entity in attribute value
--- /dev/null
+<!DOCTYPE doc [
+<!ENTITY e SYSTEM "null.ent">
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc a CDATA "&e;">
+]>
+<doc></doc>
--- /dev/null
+#! /bin/bash
+
+# $Id$
+
+
+t=./test_negative
+
+init_test () {
+ # $1: Options for test_negative
+ # $2: Path to test record
+ options="$1"
+ input="$2"
+ output=`dirname $input`/`basename $input .xml`.out
+ if [ -f "$output" ]; then
+ echo "Test $input already initialized; skipping"
+ else
+ $t $options "$input" >"$output"
+ echo Test $input initialized.
+ fi
+}
+
+
+check_test () {
+ # $1: Options for test_negative
+ # $2: Path to test record
+ options="$1"
+ input="$2"
+ output=`dirname $input`/`basename $input .xml`.out
+ $t $options "$input" >current.out
+ if [ -f "$output" ]; then
+ if cmp "$output" current.out; then
+ echo Test $input OK
+ else
+ echo Test $input FAILED!!!
+ fi
+ else
+ echo Test $input still uninitialized
+ echo - OUTPUT:
+ cat current.out
+ fi
+}
+
+
+for_directory () {
+ what="$1"
+ shift
+ options="$1"
+ shift
+ while [ $# -gt 0 ]; do
+ input="$1"
+ shift
+ if [ -f "$input" ]; then
+ $what "$options" "$input"
+ else
+ if [ -d "$input" ]; then
+ for ent in $input/*.xml; do
+ for_directory $what "$options" $ent
+ done
+ else
+ echo "Not found: $input" >&2
+ fi
+ fi
+ done
+}
+
+
+usage () {
+ cat <<EOF >&2
+usage: $0 [ -init -wf ] file ... dir ...
+EOF
+ exit 1
+}
+
+
+action="check_test"
+options=""
+while true; do
+ case "x$1" in
+ x-init)
+ action="init_test"
+ shift
+ ;;
+ x-wf)
+ options="$options -wf"
+ shift
+ ;;
+ x-*)
+ usage
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+
+if [ $# -gt 0 ]; then
+ for_directory $action "$options" "$@"
+else
+ for_directory $action -wf \
+ data_jclark_notwf/ext-sa data_jclark_notwf/not-sa data_jclark_notwf/sa \
+ data_notwf/sa
+ for_directory $action "" \
+ data_jclark_invalid data_invalid
+fi
+
+# ======================================================================
+# $Log$
+# Revision 1.1 2000/11/17 09:57:33 lpadovan
+# Initial revision
+#
+# Revision 1.2 2000/05/01 16:23:39 gerd
+# Added data_invalid.
+#
+# Revision 1.1 2000/05/01 15:58:50 gerd
+# Initial revision.
+#
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec print_error e =
+ print_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ print_endline ("WARNING: " ^ w)
+ end
+;;
+
+let parse debug wf iso88591 filename =
+ try
+ let config =
+ { default_config with
+ warner = new warner;
+ debugging_mode = debug;
+ encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
+ idref_pass = true;
+ }
+ in
+ let parse_fn =
+ if wf then parse_wfdocument_entity
+ else
+ let index = new hash_index in
+ parse_document_entity
+ ?transform_dtd:None
+ ~id_index:(index :> 'ext index)
+ in
+ let tree =
+ parse_fn
+ config
+ (from_file filename)
+ default_spec
+ in
+ print_endline "Parsed without error";
+ with
+ e ->
+ error_happened := true;
+ print_error e
+;;
+
+
+let main() =
+ let debug = ref false in
+ let wf = ref false in
+ let iso88591 = ref false in
+ let files = ref [] in
+ Arg.parse
+ [ "-d", Arg.Set debug, "turn debugging mode on";
+ "-wf", Arg.Set wf, "check only on well-formedness";
+ "-iso-8859-1", Arg.Set iso88591, "use ISO-8859-1 as internal encoding instead of UTF-8";
+ ]
+ (fun x -> files := x :: !files)
+ "
+usage: test_negative [options] file ...
+
+List of options:";
+ files := List.rev !files;
+ List.iter (parse !debug !wf !iso88591) !files;
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:33 lpadovan
+ * Initial revision
+ *
+ * Revision 1.6 2000/07/14 14:57:12 gerd
+ * Updated: warner
+ *
+ * Revision 1.5 2000/07/14 14:20:11 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.4 2000/07/09 01:49:09 gerd
+ * Updated because of PXP interface changes.
+ *
+ * Revision 1.3 2000/06/04 20:31:21 gerd
+ * Updates because of renamed PXP modules.
+ *
+ * Revision 1.2 2000/05/28 17:23:22 gerd
+ * Updated.
+ *
+ * Revision 1.1 2000/05/01 15:58:50 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_reader: test_reader.ml
+ ocamllex minilex.mll
+ ocamlfind ocamlc -custom -o test_reader -package .,unix,threads \
+ -linkpkg -thread -noautolink \
+ -g minilex.ml test_reader.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa minilex.ml
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_reader
+
--- /dev/null
+{ }
+rule nextchar = parse
+ _
+ { Some (Lexing.lexeme lexbuf).[0] }
+ | eof
+ { None }
+{ }
--- /dev/null
+0123456789
\ No newline at end of file
--- /dev/null
+open Pxp_reader;;
+open Pxp_types;;
+open Minilex;;
+
+let make_channel s =
+ (* Returns a channel reading the bytes from the string s *)
+ let rd, wr = Unix.pipe() in
+ let ch_rd = Unix.in_channel_of_descr rd in
+ let ch_wr = Unix.out_channel_of_descr wr in
+ ignore
+ (Thread.create
+ (fun () ->
+ output_string ch_wr s;
+ close_out ch_wr;
+ )
+ ()
+ );
+ ch_rd
+;;
+
+(**********************************************************************)
+
+let t001 () =
+ (* Reads from a string (without recoding it), checks the lexbuf size *)
+ let s = "0123456789abc" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some '9');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ r # change_encoding "";
+ let c = nextchar lb in
+ assert (c = Some 'a');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some 'c');
+ let c = nextchar lb in
+ assert (c = None);
+ r # close_in;
+ true
+;;
+
+
+let t002 () =
+ (* Like t001, but reads from a channel *)
+ let ch = make_channel "0123456789abc" in
+ let r = new resolve_read_this_channel ch in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some '9');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ r # change_encoding "";
+ let c = nextchar lb in
+ assert (c = Some 'a');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ ignore(nextchar lb);
+ let c = nextchar lb in
+ assert (c = Some 'c');
+ let c = nextchar lb in
+ assert (c = None);
+ r # close_in;
+ true
+;;
+
+
+let t003 () =
+ (* Tests non-automatic encoding conversion from ISO-8859-1 to UTF-8 *)
+ let s = "0«»°áà âãäÃÀÂÃÄéèêëÃìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ" in
+ let r = new resolve_read_this_string ~fixenc:`Enc_iso88591 s in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ (* Note: because we initialize the resolver with ~fixenc, the resolver can
+ * fill the buffer with more than one byte from the beginning.
+ *)
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+
+let t004 () =
+ (* Tests non-automatic encoding conversion from UTF-8 to ISO-8859-1 *)
+ let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
+ let r = new resolve_read_this_string ~fixenc:`Enc_utf8 s in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos < lb.Lexing.lex_buffer_len);
+ (* Note: because we initialize the resolver with ~fixenc, the resolver can
+ * fill the buffer with more than one byte from the beginning.
+ *)
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0«»°áà âãäÃÀÂÃÄéèêëÃìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ"
+;;
+
+
+let t005 () =
+ (* Tests automatic encoding conversion from UTF-8 to ISO-8859-1 *)
+ let s = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_iso88591;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0«»°áà âãäÃÀÂÃÄéèêëÃìîïÃÌÎÃóòôõøöÓÒÔÕØÖúùûüýÿÃßç¡¿ñÑ"
+;;
+
+
+let t006 () =
+ (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
+ * This variant invokes change_encoding early.
+ *)
+ let s = "\254\255\0000\000«\000»\000°\000á\000à \000â\000ã\000ä\000Ã\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000Ã\000ì\000î\000ï\000Ã\000ÃŒ\000ÃŽ\000Ã\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ã’\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ã\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ r # change_encoding "";
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+
+let t007 () =
+ (* Tests automatic encoding conversion from UTF-16-BE to UTF-8
+ * This variant does not invoke change_encoding
+ *)
+ let s = "\254\255\0000\000«\000»\000°\000á\000à \000â\000ã\000ä\000Ã\000À\000Â\000Ã\000Ä\000é\000è\000ê\000ë\000Ã\000ì\000î\000ï\000Ã\000ÃŒ\000ÃŽ\000Ã\000ó\000ò\000ô\000õ\000ø\000ö\000Ó\000Ã’\000Ô\000Õ\000Ø\000Ö\000ú\000ù\000û\000ü\000ý\000ÿ\000Ã\000ß\000ç\000¡\000¿\000ñ\000Ñ" in
+ let r = new resolve_read_this_string s in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in Anonymous in
+ let c = ref (nextchar lb) in
+ assert (!c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ let u = ref "" in
+ while !c <> None do
+ ( match !c with
+ Some x -> u := !u ^ String.make 1 x
+ | None -> ()
+ );
+ c := nextchar lb
+ done;
+ r # close_in;
+ !u = "0\194\171\194\187\194\176\195\161\195\160\195\162\195\163\195\164\195\129\195\128\195\130\195\131\195\132\195\169\195\168\195\170\195\171\195\173\195\172\195\174\195\175\195\141\195\140\195\142\195\143\195\179\195\178\195\180\195\181\195\184\195\182\195\147\195\146\195\148\195\149\195\152\195\150\195\186\195\185\195\187\195\188\195\189\195\191\195\157\195\159\195\167\194\161\194\191\195\177\195\145"
+;;
+
+(**********************************************************************)
+
+let t100 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let cwd = Sys.getcwd() in
+ let lb = r # open_in (System ("file://localhost" ^ cwd ^ "/t100.dat")) in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+let t101 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let cwd = Sys.getcwd() in
+ let lb = r # open_in (System ("//localhost" ^ cwd ^ "/t100.dat")) in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+let t102 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let cwd = Sys.getcwd() in
+ let lb = r # open_in (System (cwd ^ "/t100.dat")) in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+let t103 () =
+ (* Reads from a file without recoding it *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in (System "t100.dat") in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+(**********************************************************************)
+
+let t110 () =
+ (* Checks whether relative URLs are properly handled *)
+ let r = new resolve_as_file () in
+ r # init_rep_encoding `Enc_utf8;
+ r # init_warner (new drop_warnings);
+ let lb = r # open_in (System "t100.dat") in
+ let c = nextchar lb in
+ assert (c = Some '0');
+ assert (lb.Lexing.lex_curr_pos = lb.Lexing.lex_buffer_len);
+ (* Note: the end of lb.lex_buffer is filled up, so lb.lex_curr_pos must
+ * now be at the end of the buffer indicating that the buffer is now
+ * empty.
+ *)
+ for i = 1 to 8 do
+ ignore(nextchar lb);
+ done;
+ let r' = r # clone in
+ let lb' = r' # open_in (System "t100.dat") in
+ let c = nextchar lb' in
+ assert (c = Some '0');
+ for i = 1 to 8 do
+ ignore(nextchar lb');
+ done;
+ let c = nextchar lb' in
+ assert (c = Some '9');
+ r' # close_in;
+ let c = nextchar lb in
+ assert (c = Some '9');
+ r # close_in;
+ true
+;;
+
+(**********************************************************************)
+(* Tests whether the encoding handling of System IDs is okay *)
+
+let t200 () =
+ (* Check the technique for the following tests:
+ * [Checks also 'combine' to some extent.)
+ *)
+ let r1 = new resolve_read_this_string
+ ~id:(System "b.xml")
+ ~fixenc:`Enc_iso88591
+ "ae" in
+ let r2 = new resolve_read_this_string
+ ~id:(System "a.xml")
+ ~fixenc:`Enc_iso88591
+ "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'b.xml'> ]> <a>&ae;</a>" in
+ let r = new combine [ r1; r2 ] in
+ (* It should now be possible to resolve &ae; *)
+ let _ =
+ Pxp_yacc.parse_document_entity
+ { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
+ (Pxp_yacc.ExtID(System "a.xml", r))
+ Pxp_yacc.default_spec
+ in
+ true
+;;
+
+
+let t201 () =
+ (* Check that System IDs are converted to UTF-8. rep_encoding = ISO-8859-1 *)
+ let r1 = new resolve_read_this_string
+ ~id:(System "\195\164.xml") (* This is an UTF-8 "ä"! *)
+ ~fixenc:`Enc_iso88591
+ "ae" in
+ let r2 = new resolve_read_this_string
+ ~id:(System "a.xml")
+ ~fixenc:`Enc_iso88591
+ "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
+ let r = new combine [ r1; r2 ] in
+ (* It should now be possible to resolve &ae; *)
+ let _ =
+ Pxp_yacc.parse_document_entity
+ { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_iso88591 }
+ (Pxp_yacc.ExtID(System "a.xml", r))
+ Pxp_yacc.default_spec
+ in
+ true
+;;
+
+
+let t202 () =
+ (* Check that System IDs are converted to UTF-8. rep_encoding = UTF-8 *)
+ let r1 = new resolve_read_this_string
+ ~id:(System "\195\164.xml")
+ ~fixenc:`Enc_iso88591
+ "ae" in
+ let r2 = new resolve_read_this_string
+ ~id:(System "a.xml")
+ ~fixenc:`Enc_iso88591
+ "<!DOCTYPE a [ <!ELEMENT a ANY> <!ENTITY ae SYSTEM 'ä.xml'> ]> <a>&ae;</a>" in
+ let r = new combine [ r1; r2 ] in
+ (* It should now be possible to resolve &ae; *)
+ let _ =
+ Pxp_yacc.parse_document_entity
+ { Pxp_yacc.default_config with Pxp_yacc.encoding = `Enc_utf8 }
+ (Pxp_yacc.ExtID(System "a.xml", r))
+ Pxp_yacc.default_spec
+ in
+ true
+;;
+
+(**********************************************************************)
+
+let test f n =
+ try
+ print_string ("Reader test " ^ n);
+ flush stdout;
+ if f() then
+ print_endline " ok"
+ else
+ print_endline " FAILED!!!!";
+ with
+ error ->
+ print_endline (" FAILED: " ^ string_of_exn error)
+;;
+
+test t001 "001";;
+test t002 "002";;
+test t003 "003";;
+test t004 "004";;
+test t005 "005";;
+test t006 "006";;
+test t007 "007";;
+
+test t100 "100";;
+test t101 "101";;
+test t102 "102";;
+test t103 "103";;
+
+test t110 "110";;
+
+test t200 "200";;
+test t201 "201";;
+test t202 "202";;
--- /dev/null
+#! /bin/sh
+
+set -e
+
+(cd reader && ./test_reader)
+(cd canonxml && ./run_canonxml)
+(cd write && ./run_write)
+(cd codewriter && ./run_codewriter)
+(cd negative && ./run_negative)
--- /dev/null
+# make validate: make bytecode executable
+# make validate.opt: make native executable
+# make clean: remove intermediate files (in this directory)
+# make CLEAN: remove intermediate files (recursively)
+# make distclean: remove any superflous files (recursively)
+#----------------------------------------------------------------------
+
+OCAMLPATH=../..
+
+test_write: test_write.ml
+ ocamlfind ocamlc -g -custom -o test_write -package .,str -linkpkg test_write.ml
+
+#----------------------------------------------------------------------
+.PHONY: all
+all:
+
+.PHONY: clean
+clean:
+ rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out1 out2 out3
+
+.PHONY: CLEAN
+CLEAN: clean
+
+.PHONY: distclean
+distclean: clean
+ rm -f *~
+ rm -f test_write
+
--- /dev/null
+#! /bin/bash
+
+test_sample () {
+ file="$1"
+ echo -n "Testing $file... "
+ ./test_write -in "$file" >out1
+ ./test_write -in out1 >out2
+ ./test_write -in out2 >out3
+ if cmp out1 out3; then
+ echo "OK"
+ else
+ echo "FAILED"
+ fi
+}
+
+
+test_sample "sample001.xml"
--- /dev/null
+<!DOCTYPE a [
+
+<!ELEMENT a (b | (c, d)* | (e, f)+ | g?)>
+<!ELEMENT b (#PCDATA | a)*>
+<!ELEMENT c EMPTY>
+<!ELEMENT d ANY>
+<!ELEMENT e EMPTY>
+<!ELEMENT f EMPTY>
+<!ELEMENT g EMPTY>
+
+<!ATTLIST a u CDATA #IMPLIED
+ v NMTOKEN "huhu"
+ w (q|p) #REQUIRED
+ x NOTATION (n1|n2) "n1"
+ y ENTITY #IMPLIED>
+
+<!NOTATION n1 SYSTEM "/bin/n1-processor">
+<!NOTATION n2 SYSTEM "/bin/n2-processor">
+
+<!ENTITY u1 SYSTEM "file-u1" NDATA n1>
+<!ENTITY u2 SYSTEM "file-u2" NDATA n2>
+
+<?pi1 args ...?>
+]>
+
+<a u="1" w="q" x="n2">
+ <b>
+ <?pi2 args ...?>
+ This is text!
+ <a w="p" y="u1">
+ <c/>
+ <d/>
+ </a>
+ </b>
+</a>
+
+<?pi3 args ...?>
--- /dev/null
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+
+open Pxp_document;;
+open Pxp_yacc;;
+open Pxp_types;;
+
+let error_happened = ref false;;
+
+let rec prerr_error e =
+ prerr_endline (string_of_exn e)
+;;
+
+class warner =
+ object
+ method warn w =
+ prerr_endline ("WARNING: " ^ w)
+ end
+;;
+
+let parse_and_write in_filename =
+ let spec =
+ let e = new element_impl default_extension in
+ make_spec_from_mapping
+ ~super_root_exemplar: e
+ ~default_pinstr_exemplar: e
+ ~data_exemplar: (new data_impl default_extension)
+ ~default_element_exemplar: e
+ ~element_mapping: (Hashtbl.create 1)
+ ()
+ in
+ let config =
+ { default_config with
+ warner = new warner;
+ enable_pinstr_nodes = true;
+ enable_super_root_node = true;
+ encoding = `Enc_utf8;
+ }
+ in
+ try
+ let tree =
+ parse_document_entity
+ config
+ (from_file in_filename)
+ spec
+ in
+
+ tree # write (Out_channel stdout) `Enc_utf8;
+ with
+ e ->
+ error_happened := true;
+ prerr_error e
+;;
+
+
+let main() =
+ let in_file = ref "" in
+ Arg.parse
+ [ "-in", (Arg.String (fun s -> in_file := s)),
+ " <file> Set the XML file to read";
+ ]
+ (fun x -> raise (Arg.Bad "Unexpected argument"))
+ "
+usage: test_write [ options ]
+
+List of options:";
+ if !in_file = "" then begin
+ prerr_endline "No input file specified.";
+ exit 1
+ end;
+ parse_and_write !in_file
+;;
+
+
+main();
+if !error_happened then exit(1);;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:35 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/08/16 23:44:21 gerd
+ * Updates because of changes of the PXP API.
+ *
+ * Revision 1.1 2000/07/16 17:50:39 gerd
+ * Initial revision.
+ *
+ *)
--- /dev/null
+#! /bin/sh
+#
+# $Id$
+# ----------------------------------------------------------------------
+#
+# usage: collect_files file ...
+#
+# Prints the names of the files passed as arguments which actually
+# exist and are regular files.
+
+for x in "$@"; do
+ if [ -f "$x" ]; then
+ echo "$x"
+ fi
+done
+
+# ======================================================================
+#
+# $Log$
+# Revision 1.1 2000/11/17 09:57:35 lpadovan
+# Initial revision
+#
+# Revision 1.1 2000/07/27 21:07:26 gerd
+# Initial revision.
+#
--- /dev/null
+#! /bin/sh
+# (*
+exec ocaml "$0" "$@"
+*) directory ".";;
+
+(* $Id$
+ * ----------------------------------------------------------------------
+ *
+ *)
+
+let get_arg variant insert_line =
+ (* returns the argument of an "#insert" line *)
+ let s = ref "" in
+ for i = 8 to String.length insert_line - 1 do
+ match insert_line.[i] with
+ ' ' -> ()
+ | '*' ->
+ (* replace '*' with 'variant' *)
+ s := !s ^ variant
+ | c ->
+ s := !s ^ String.make 1 c
+ done;
+ !s
+;;
+
+
+let edit_file variant name =
+ let basename = Filename.chop_suffix name ".src" in
+ let mllname = basename ^ "_" ^ variant ^ ".mll" in
+ let chin = open_in name in
+ let chout = open_out mllname in
+ output_string chout "(* File generated by insert_variant; DO NOT EDIT! *)\n";
+ begin try
+ while true do
+ let line = input_line chin in
+ (* We do not have Str here. *)
+ if String.length line >= 8 & String.sub line 0 8 = "#insert " then begin
+ let insname = get_arg variant line in
+ (* Copy the file 'insname' to chout *)
+ let chcopy = open_in insname in
+ let n = in_channel_length chcopy in
+ let s = String.create n in
+ really_input chcopy s 0 n;
+ close_in chcopy;
+ output_string chout s;
+ end
+ else begin
+ output_string chout line;
+ output_char chout '\n';
+ end
+ done
+ with
+ End_of_file -> ()
+ end;
+ close_in chin;
+ close_out chout
+;;
+
+
+let main() =
+ let variant = ref "" in
+ let files = ref [] in
+ Arg.current := 0; (* Because of a OCaml-3.00 bug *)
+ Arg.parse
+ [ "-variant", Arg.String (fun s -> variant := s),
+ "<name> Set the variant (character encoding)";
+ ]
+ (fun s -> files := !files @ [s])
+ "insert_variant [ options ] file.src ...
+
+Reads the files, replaces the #insert lines by the referred files, and
+writes the file file_variant.mll.
+
+The #insert lines include the specified file into the source. The
+asterisk (*) is replaced by the name of the variant.
+
+Options:
+";
+
+ if !variant = "" then
+ failwith "No variant specified!";
+
+ List.iter
+ (fun name -> edit_file !variant name)
+ !files
+;;
+
+
+main();;
+
+(* ======================================================================
+ * History:
+ *
+ * $Log$
+ * Revision 1.1 2000/11/17 09:57:35 lpadovan
+ * Initial revision
+ *
+ * Revision 1.2 2000/05/20 21:14:33 gerd
+ * Workaround for an OCaml 3.00 bug.
+ *
+ * Revision 1.1 2000/05/20 20:30:15 gerd
+ * Initial revision.
+ *
+ *
+ *)
--- /dev/null
+*.cmo
+*.cmx
+*.cmi
+
--- /dev/null
+#(******************************************************)
+#(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
+#(* 14/05/2000 *)
+#(******************************************************)
+
+OCAMLC = ocamlc
+OCAMLOPT = ocamlopt
+OCAMLDEP = ocamldep
+OCAMLLEX = ocamllex
+OCAMLYACC = ocamlyacc
+
+all: ucs2_to_utf8
+opt: ucs2_to_utf8.opt
+
+DEPOBJS = ucs2_to_utf8.ml lexer.ml parser.ml parser.mli types.ml
+
+UCS2_TO_UTF8OBJS = types.cmo lexer.cmo parser.cmo ucs2_to_utf8.cmo
+UCS2_TO_UTF8OPTOBJS = types.cmx lexer.cmx parser.cmx ucs2_to_utf8.cmx
+
+lexer.ml:
+ $(OCAMLLEX) lexer.mll
+
+parser.ml:
+ $(OCAMLYACC) parser.mly
+
+parser.mli:
+ $(OCAMLYACC) parser.mly
+
+depend: lexer.ml parser.ml parser.mli
+ $(OCAMLDEP) $(DEPOBJS) > depend
+
+ucs2_to_utf8: $(UCS2_TO_UTF8OBJS)
+ $(OCAMLC) -o ucs2_to_utf8 $(UCS2_TO_UTF8OBJS)
+
+ucs2_to_utf8.opt: $(UCS2_TO_UTF8OPTOBJS)
+ $(OCAMLOPT) -o ucs2_to_utf8.opt $(UCS2_TO_UTF8OPTOBJS)
+
+.SUFFIXES: .ml .mli .cmo .cmi .cmx
+.ml.cmo:
+ $(OCAMLC) -c $<
+.mli.cmi:
+ $(OCAMLC) -c $<
+.ml.cmx:
+ $(OCAMLOPT) -c $<
+
+clean:
+ rm -f *.cm[iox] *.o lexer.ml parser.ml parser.mli \
+ ucs2_to_utf8 ucs2_to_utf8.opt
+
+include depend
--- /dev/null
+(******************************************************)
+(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
+(* 14/05/2000 *)
+(******************************************************)
+
+How to compile: "make clean && make depend && make && make opt"
+
+Usage: "cat input.mll | ./ucs2_to_utf8 > output.mll"
+ where in input.mll there are definitions of ucs2 regular expressions
+ and in output.mll there are the same utf8 regular expressions in the
+ format expected by ocamllex
+
+ See input/input.mll for an example (the definitions are taken from the
+ appendix B of the XML reccomendation) and input/example.mll for a
+ smaller one.
--- /dev/null
+{
+(******************************************************)
+(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
+(* 14/05/2000 *)
+(******************************************************)
+
+open Parser
+
+let comment_depth = ref 0;;
+
+let charint_of_lexeme l =
+ String.set l 0 '0' ;
+ int_of_string l
+;;
+}
+
+let digit = ['0'-'9']|['A'-'F']
+
+rule token =
+ parse
+ [' ' '\t' '\n'] { token lexbuf }
+ | "let" { LET }
+ | (['a'-'z']|'_')(['a'-'z']|['A'-'Z']|'_'|['0'-'9']|'\'')*
+ { IDENT (Lexing.lexeme lexbuf) }
+ | '=' { EQ }
+ | ";;" { END_OF_LET }
+ | "|" { PIPE }
+ | '[' { LBRACKET }
+ | ']' { RBRACKET }
+ | '-' { RANGE }
+ | "(*" { incr comment_depth ;
+ comment lexbuf
+ }
+ | "#x" digit digit digit digit { CHAR (charint_of_lexeme (Lexing.lexeme lexbuf)) }
+ | eof { EOF }
+
+and comment =
+ parse
+ "(*" { incr comment_depth ; comment lexbuf }
+ | "*)" { decr comment_depth ;
+ if !comment_depth = 0 then token lexbuf else comment lexbuf
+ }
+ | _ { comment lexbuf }
--- /dev/null
+/******************************************************/
+/* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> */
+/* 14/05/2000 */
+/******************************************************/
+
+%token <int>CHAR
+%token <string>IDENT
+%token LET
+%token EQ
+%token END_OF_LET
+%token RBRACKET
+%token PIPE
+%token LBRACKET
+%token RANGE
+%token EOF
+%start main
+%type <Types.definition list> main
+
+%%
+
+main:
+ EOF { [] }
+ | declaration main { $1::$2 }
+;
+
+declaration:
+ LET IDENT EQ regexp END_OF_LET
+ { { Types.id = $2 ; Types.rel = $4 } }
+;
+
+regexp:
+ regexptoken PIPE regexp { $1::$3 }
+ | regexptoken { [$1] }
+;
+
+regexptoken:
+ CHAR { Types.Char $1 }
+ | LBRACKET CHAR RANGE CHAR RBRACKET { Types.Interval ($2,$4) }
+ | IDENT { Types.Identifier $1 }
+;
--- /dev/null
+(******************************************************)
+(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
+(* 14/05/2000 *)
+(******************************************************)
+
+type regexp =
+ Char of int
+ | Interval of int * int (* lower bound, upper bound *)
+ | Identifier of string
+ | Concat of regexp list list (* concatenation of disjunctions *)
+;;
+
+type definition = { id : string ; rel : regexp list } ;;
--- /dev/null
+(******************************************************)
+(* Claudio Sacerdoti Coen <sacerdot@cs.unibo.it> *)
+(* 14/05/2000 *)
+(******************************************************)
+
+(* Surrogate Pairs are not accepted in XML files (is it true???) *)
+exception SurrogatePairs;;
+
+(* Interval (n,m) where n >m m *)
+exception InvalidInterval of int * int;;
+
+(* Given an ucs2 character code, returns it in utf8 *)
+(* (as a concatenation of characters) *)
+let char_ucs2_to_utf8 =
+ function
+ n when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+ | n when n <= 0x007F -> Types.Char n
+ | n when n <= 0x07FF ->
+ Types.Concat
+ [[Types.Char (n lsr 6 land 0b00011111 lor 0b11000000)] ;
+ [Types.Char (n land 0b00111111 lor 0b10000000)]]
+ | n ->
+ Types.Concat
+ [[Types.Char (n lsr 12 land 0b00001111 lor 0b11100000)] ;
+ [Types.Char (n lsr 6 land 0b00111111 lor 0b10000000)] ;
+ [Types.Char (n land 0b00111111 lor 0b10000000)]]
+;;
+
+(*CSC: Two functions for debugging pourposes only
+
+let char_ucs2_to_utf8 =
+ function
+ n when n >= 0xD800 && n <= 0xDFFF -> assert false
+ | n when n <= 0x007F -> [[n]]
+ | n when n <= 0x07FF ->
+ [[(n lsr 6 land 0b00011111 lor 0b11000000)] ;
+ [(n land 0b00111111 lor 0b10000000)]]
+ | n ->
+ [[(n lsr 12 land 0b00001111 lor 0b11100000)] ;
+ [(n lsr 6 land 0b00111111 lor 0b10000000)] ;
+ [(n land 0b00111111 lor 0b10000000)]]
+;;
+
+let rec bprint =
+ function
+ 0 -> ""
+ | n -> bprint (n / 2) ^ string_of_int (n mod 2)
+;;
+*)
+
+(* A few useful functions *)
+let rec mklist e =
+ function
+ 0 -> []
+ | n -> e::(mklist e (n - 1))
+;;
+
+let sup =
+ let t = Types.Char 0b10111111 in
+ function
+ 1 -> t
+ | n -> Types.Concat (mklist [t] n)
+;;
+
+let rec inf =
+ let b = Types.Char 0b10000000 in
+ function
+ 1 -> [[b]]
+ | n -> mklist [b] n
+;;
+
+let mysucc =
+ function
+ [Types.Char n] -> n + 1
+ | _ -> assert false
+;;
+
+let mypred =
+ function
+ [Types.Char n] -> n - 1
+ | _ -> assert false
+;;
+
+(* Given two utf8-encoded extremes of an interval character code *)
+(* whose 'length' is the same, it returns the utf8 regular expression *)
+(* matching all the characters in the interval *)
+let rec same_length_ucs2_to_utf8 =
+ let module T = Types in
+ function
+ (T.Char n, T.Char m) when n = m -> [T.Char n]
+ | (T.Char n, T.Char m) -> [T.Interval (n,m)]
+ | (T.Concat [hen ; [tln]], T.Concat [hem ; [tlm]]) when hen = hem ->
+ [T.Concat [hen ; same_length_ucs2_to_utf8 (tln,tlm)]]
+ | (T.Concat [hen ; [tln]], T.Concat ([hem ; [tlm]] as e2)) ->
+ (T.Concat [hen ; same_length_ucs2_to_utf8 (tln,sup 1)]) ::
+ (let shen = mysucc hen
+ and phem = mypred hem in
+ let succhen = [T.Char shen] in
+ if succhen = hem then
+ same_length_ucs2_to_utf8 (T.Concat (succhen::(inf 1)), T.Concat e2)
+ else
+ (T.Concat [[T.Interval (shen, phem)] ;
+ [T.Interval (0b10000000,0b10111111)]])::
+ same_length_ucs2_to_utf8 (T.Concat (hem::(inf 1)), T.Concat e2)
+ )
+ (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf 1)), T.Concat e2)*)
+ | (T.Concat (hen::tln), T.Concat (hem::tlm)) when hen = hem ->
+ [T.Concat [hen ; same_length_ucs2_to_utf8 (T.Concat tln, T.Concat tlm)]]
+ | (T.Concat (hen::tln), T.Concat ((hem::tlm) as e2)) ->
+ let n = List.length tln in
+ (T.Concat
+ [hen ; same_length_ucs2_to_utf8 (T.Concat tln,sup n)]) ::
+ (let shen = mysucc hen
+ and phem = mypred hem in
+ let succhen = [T.Char shen] in
+ if succhen = hem then
+ same_length_ucs2_to_utf8 (T.Concat (succhen::(inf n)), T.Concat e2)
+ else
+ (T.Concat [[T.Interval (shen, phem)] ;
+ [T.Interval (0b10000000,0b10111111)] ;
+ [T.Interval (0b10000000,0b10111111)]]
+ )::
+ same_length_ucs2_to_utf8 (T.Concat (hem::(inf n)), T.Concat e2)
+ )
+ (*same_length_ucs2_to_utf8 (T.Concat ((mysucc hen)::(inf n)),T.Concat e2)*)
+ | _ -> assert false
+;;
+
+(* Given an interval of ucs2 characters, splits *)
+(* the list in subintervals whose extremes has *)
+(* the same utf8 encoding length and, for each *)
+(* extreme, calls same_length_ucs2_to_utf8 *)
+let rec seq_ucs2_to_utf8 =
+ function
+ (n,_) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+ | (_,n) when n >= 0xD800 && n <= 0xDFFF -> raise SurrogatePairs
+ | (n,m) when n > m -> raise (InvalidInterval (n,m))
+ | (n,m) when n = m -> [char_ucs2_to_utf8 n]
+ | (n,m) when n <= 0x07F && m > 0x07F ->
+ (seq_ucs2_to_utf8 (n,0x07F)) @ (seq_ucs2_to_utf8 (0x080,m))
+ | (n,m) when n <= 0x07FF && m > 0x07FF ->
+ (seq_ucs2_to_utf8 (n,0x07FF)) @ (seq_ucs2_to_utf8 (0x0800,m))
+ | (n,m) ->
+ let utf8n = char_ucs2_to_utf8 n
+ and utf8m = char_ucs2_to_utf8 m in
+ same_length_ucs2_to_utf8 (utf8n,utf8m)
+;;
+
+(* Given an ucs2 regual expression, returns *)
+(* the corresponding utf8 regular expression *)
+let ucs2_to_utf8 { Types.id = id ; Types.rel = rel } =
+ let rec aux re l2 =
+ match re with
+ Types.Char i -> char_ucs2_to_utf8 i :: l2
+ | Types.Interval (l,u) -> seq_ucs2_to_utf8 (l,u) @ l2
+ | Types.Identifier _ as i -> i :: l2
+ | Types.Concat rell ->
+ let foo rel = List.fold_right aux rel [] in
+ Types.Concat (List.map foo rell) :: l2
+ in
+ { Types.id = id ; Types.rel = List.fold_right aux rel [] }
+;;
+
+(* The function actually used to produce the output *)
+let output = print_string ;;
+
+(* padded_string_of_int i returns the string representing the *)
+(* integer i (i < 256) using exactly 3 digits (example: 13 -> "013") *)
+let padded_string_of_int i =
+ if i < 10 then
+ "00" ^ string_of_int i
+ else if i < 100 then
+ "0" ^ string_of_int i
+ else
+ string_of_int i
+;;
+
+(* Two functions useful to print a definition *)
+let rec print_disjunction ?(first = true) =
+ function
+ [] -> ()
+ | he::tl ->
+ if not first then output " | " ;
+ print_re he ;
+ print_disjunction ~first:false tl
+and print_re =
+ function
+ Types.Char i -> output ("'\\" ^ padded_string_of_int i ^ "'")
+ | Types.Interval (l,u) ->
+ output ("['\\" ^ padded_string_of_int l ^ "'-'\\" ^
+ padded_string_of_int u ^ "']")
+ | Types.Identifier i -> output i
+ | Types.Concat rell ->
+ let foo rel =
+ if List.length rel > 1 then
+ (output "(" ; print_disjunction rel ; output ")")
+ else
+ print_disjunction rel
+ in
+ List.iter foo rell
+;;
+
+(* print_definition prints a definition in the format expected by ocamllex *)
+let print_definition { Types.id = id ; Types.rel = rel } =
+ output ("let " ^ id ^ " =\n ") ;
+ print_disjunction rel ;
+ output "\n\n"
+;;
+
+(* main *)
+let _ =
+ let lexbuf = Lexing.from_channel stdin in
+ let ucs2_result = Parser.main Lexer.token lexbuf in
+ List.iter print_definition (List.map ucs2_to_utf8 ucs2_result)
+;;