From: Luca Padovani Date: Fri, 17 Nov 2000 09:57:23 +0000 (+0000) Subject: Initial revision X-Git-Tag: nogzip~172 X-Git-Url: http://matita.cs.unibo.it/gitweb/?p=helm.git;a=commitdiff_plain;h=c03d2c1fdab8d228cb88aaba5ca0f556318bebc5 Initial revision --- diff --git a/helm/DEVEL/pxp/.cvsignore b/helm/DEVEL/pxp/.cvsignore new file mode 100644 index 000000000..c1fcbc4ae --- /dev/null +++ b/helm/DEVEL/pxp/.cvsignore @@ -0,0 +1,7 @@ +*.cmo +*.cmx +*.cmi + +*.o +*.a + diff --git a/helm/DEVEL/pxp/netstring/.cvsignore b/helm/DEVEL/pxp/netstring/.cvsignore new file mode 100644 index 000000000..c1fcbc4ae --- /dev/null +++ b/helm/DEVEL/pxp/netstring/.cvsignore @@ -0,0 +1,7 @@ +*.cmo +*.cmx +*.cmi + +*.o +*.a + diff --git a/helm/DEVEL/pxp/netstring/LICENSE b/helm/DEVEL/pxp/netstring/LICENSE new file mode 100644 index 000000000..820032ee2 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/LICENSE @@ -0,0 +1,21 @@ +Copyright 1999 by Gerd Stolpmann + +The package "netstring" is copyright by Gerd Stolpmann. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the "netstring" software (the "Software"), to deal in the +Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +The Software is provided ``as is'', without warranty of any kind, express +or implied, including but not limited to the warranties of +merchantability, fitness for a particular purpose and noninfringement. +In no event shall Gerd Stolpmann be liable for any claim, damages or +other liability, whether in an action of contract, tort or otherwise, +arising from, out of or in connection with the Software or the use or +other dealings in the software. diff --git a/helm/DEVEL/pxp/netstring/META b/helm/DEVEL/pxp/netstring/META new file mode 100644 index 000000000..d422128ab --- /dev/null +++ b/helm/DEVEL/pxp/netstring/META @@ -0,0 +1,54 @@ +version = "0.9.3" +requires = "str" +description = "String processing for the Internet" + +archive(byte) = + "netstring.cma netmappings_iso.cmo netmappings_other.cmo" +archive(byte,toploop) = + "netstring.cma netmappings_iso.cmo netmappings_other.cmo + netstring_top.cmo" +archive(byte,mt) = + "netstring.cma netmappings_iso.cmo netmappings_other.cmo + netstring_mt.cmo" +archive(byte,mt,toploop) = + "netstring.cma netmappings_iso.cmo netmappings_other.cmo + netstring_mt.cmo netstring_top.cmo" +archive(native) = + "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx" +archive(native,mt) = + "netstring.cmxa netmappings_iso.cmx netmappings_other.cmx + netstring_mt.cmx" + +archive(byte,netstring_only_iso) = + "netstring.cma netmappings_iso.cmo" +archive(byte,toploop,netstring_only_iso) = + "netstring.cma netmappings_iso.cmo + netstring_top.cmo" +archive(byte,mt,netstring_only_iso) = + "netstring.cma netmappings_iso.cmo + netstring_mt.cmo" +archive(byte,mt,toploop,netstring_only_iso) = + "netstring.cma netmappings_iso.cmo + netstring_mt.cmo netstring_top.cmo" +archive(native,netstring_only_iso) = + "netstring.cmxa netmappings_iso.cmx" +archive(native,mt,netstring_only_iso) = + "netstring.cmxa netmappings_iso.cmx + netstring_mt.cmx" + +archive(byte,netstring_minimum) = + "netstring.cma" +archive(byte,toploop,netstring_minimum) = + "netstring.cma + netstring_top.cmo" +archive(byte,mt,netstring_minimum) = + "netstring.cma + netstring_mt.cmo" +archive(byte,mt,toploop,netstring_minimum) = + "netstring.cma + netstring_mt.cmo netstring_top.cmo" +archive(native,netstring_minimum) = + "netstring.cmxa" +archive(native,mt,netstring_minimum) = + "netstring.cmxa + netstring_mt.cmx" diff --git a/helm/DEVEL/pxp/netstring/Makefile b/helm/DEVEL/pxp/netstring/Makefile new file mode 100644 index 000000000..98f9ef013 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/Makefile @@ -0,0 +1,151 @@ +# make all: make bytecode archive +# make opt: make native archive +# make install: install bytecode archive, and if present, native archive +# make uninstall: uninstall package +# make clean: remove intermediate files +# make distclean: remove any superflous files +# make release: cleanup, create archive, tag CVS module +# (for developers) + +#---------------------------------------------------------------------- +# specific rules for this package: + +OBJECTS = netstring_str.cmo \ + netencoding.cmo netbuffer.cmo netstream.cmo \ + mimestring.cmo cgi.cmo base64.cmo \ + nethtml_scanner.cmo nethtml.cmo \ + neturl.cmo \ + netmappings.cmo netconversion.cmo +XOBJECTS = $(OBJECTS:.cmo=.cmx) +ARCHIVE = netstring.cma +XARCHIVE = netstring.cmxa + +NAME = netstring +REQUIRES = str + +ISO_MAPPINGS = mappings/iso*.unimap +OTHER_MAPPINGS = mappings/cp*.unimap \ + mappings/adobe*.unimap \ + mappings/jis*.unimap \ + mappings/koi*.unimap \ + mappings/mac*.unimap \ + mappings/windows*.unimap + +all: $(ARCHIVE) \ + netstring_top.cmo netstring_mt.cmo \ + netmappings_iso.cmo netmappings_other.cmo + +opt: $(XARCHIVE) \ + netstring_mt.cmx \ + netmappings_iso.cmx netmappings_other.cmx + + +$(ARCHIVE): $(OBJECTS) + $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS) + +$(XARCHIVE): $(XOBJECTS) + $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS) + +netmappings_iso.ml: + $(MAKE) -C tools + test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \ + -o netmappings_iso.ml $(ISO_MAPPINGS) + +netmappings_other.ml: + $(MAKE) -C tools + test ! -d mappings || tools/unimap_to_ocaml/unimap_to_ocaml \ + -o netmappings_other.ml $(OTHER_MAPPINGS) + +#---------------------------------------------------------------------- +# general rules: + +OPTIONS = +OCAMLC = ocamlc $(DEBUG) $(OPTIONS) $(ROPTIONS) +OCAMLOPT = ocamlopt $(OPTIONS) $(ROPTIONS) +OCAMLLEX = ocamllex +OCAMLDEP = ocamldep $(OPTIONS) +OCAMLFIND = ocamlfind + +DEBUG = +# Invoke with: make DEBUG=-g + +depend: *.ml *.mli + $(OCAMLDEP) *.ml *.mli >depend + +depend.pkg: Makefile + $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg + +.PHONY: install +install: all + { test ! -f $(XARCHIVE) || extra="*.cmxa *.a netstring_mt.cmx netmappings_iso.cmx netmappings_other.cmx netstring_mt.o netmappings_iso.o netmappings_other.o"; }; \ + $(OCAMLFIND) install $(NAME) *.mli *.cmi *.cma netstring_top.cmo netstring_mt.cmo netmappings_iso.cmo netmappings_other.cmo META $$extra + +.PHONY: install-cgi +install-cgi: + $(OCAMLFIND) install cgi compat-cgi/META + + +.PHONY: install-base64 +install-base64: + $(OCAMLFIND) install base64 compat-base64/META + + +.PHONY: uninstall +uninstall: + $(OCAMLFIND) remove $(NAME) + +.PHONY: uninstall-cgi +uninstall-cgi: + $(OCAMLFIND) remove cgi + +.PHONY: uninstall-base64 +uninstall-base64: + $(OCAMLFIND) remove base64 + +.PHONY: clean +clean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa + test ! -d mappings || rm -f netmappings_iso.ml netmappings_other.ml + +.PHONY: distclean +distclean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa + rm -f *~ depend depend.pkg compat-cgi/*~ compat-base64/*~ + $(MAKE) -C tests distclean + $(MAKE) -C doc distclean + $(MAKE) -C tools distclean + +RELEASE: META + awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE + +.PHONY: dist +dist: RELEASE + r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*/doc/readme.dtd" --exclude="*/Mail" --exclude="*/mappings" $(NAME) + +.PHONY: tag-release +tag-release: RELEASE + r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r $(NAME) + +.PHONY: release +release: distclean + test -f netmappings_iso.ml + test -f netmappings_other.ml + $(MAKE) tag-release + $(MAKE) dist + +.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll + +.ml.cmx: + $(OCAMLOPT) -c -thread $< + +.ml.cmo: + $(OCAMLC) -c -thread $< + +.mli.cmi: + $(OCAMLC) -c $< + +.mll.ml: + $(OCAMLLEX) $< + +include depend +include depend.pkg diff --git a/helm/DEVEL/pxp/netstring/RELEASE b/helm/DEVEL/pxp/netstring/RELEASE new file mode 100644 index 000000000..965065db5 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/RELEASE @@ -0,0 +1 @@ +0.9.3 diff --git a/helm/DEVEL/pxp/netstring/base64.ml b/helm/DEVEL/pxp/netstring/base64.ml new file mode 100644 index 000000000..285626f77 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/base64.ml @@ -0,0 +1,24 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +let encode s = Netencoding.Base64.encode s;; +let url_encode s = Netencoding.Base64.url_encode s;; +let decode s = Netencoding.Base64.decode s;; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.2 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.1 2000/03/02 01:15:20 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/base64.mli b/helm/DEVEL/pxp/netstring/base64.mli new file mode 100644 index 000000000..5dd60ea75 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/base64.mli @@ -0,0 +1,36 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(**********************************************************************) +(* Base64 compatibility module *) +(**********************************************************************) + +(* PLEASE DO NOT USE THIS MODULE IN NEW SOFTWARE! + * The module Netencoding.Base64 is the preferred API. This module is + * only for compatibility with older software. + *) + +(* This interface is compatible with all previously released Base64 + * modules (0.1 and 0.2). + *) + +val encode : string -> string + +val url_encode : string -> string + +val decode : string -> string + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.1 2000/03/02 01:15:20 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/cgi.ml b/helm/DEVEL/pxp/netstring/cgi.ml new file mode 100644 index 000000000..48412be29 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/cgi.ml @@ -0,0 +1,645 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +exception Resources_exceeded + +type argument_processing = Memory | File | Automatic;; + +type argument = + { mutable arg_name : string; + mutable arg_processing : argument_processing; + mutable arg_buf_value : Buffer.t; + mutable arg_mem_value : string option; + (* Here, the value is stored if it must be kept in memory *) + mutable arg_disk_value : string Weak.t; + (* This component is used iff arg_mem_value = None. The + * weak array has a length of 1, and the single element stores + * the value (if any). + *) + mutable arg_file : string option; + (* The filename of the temporary file storing the value *) + mutable arg_fd : out_channel option; + (* The file descriptor of the temp file (if open) *) + mutable arg_mimetype : string; + mutable arg_filename : string option; + mutable arg_header : (string * string) list; + (* For the last three components, see the description of the + * corresponding functions in the mli file. + *) + } +;; + +type workaround = + Work_around_MSIE_Content_type_bug + | Work_around_backslash_bug +;; + +type config = + { maximum_content_length : int; + how_to_process_arguments : argument -> argument_processing; + tmp_directory : string; + tmp_prefix : string; + workarounds : workaround list; + } +;; + + +let print_argument arg = + Format.printf + "" + arg.arg_name + (match arg.arg_filename with None -> "*" | Some n -> n) + arg.arg_mimetype + (match arg.arg_file with None -> "Memory" | Some n -> n) +;; + + +let encode = Netencoding.Url.encode ;; +let decode = Netencoding.Url.decode ;; + + + +let url_split_re = + Str.regexp "[&=]";; + + +let mk_url_encoded_parameters nv_pairs = + String.concat "&" + (List.map + (fun (name,value) -> + let name_encoded = Netencoding.Url.encode name in + let value_encoded = Netencoding.Url.encode value in + name_encoded ^ "=" ^ value_encoded + ) + nv_pairs + ) +;; + + +let dest_url_encoded_parameters parstr = + + let rec parse_after_amp tl = + match tl with + Str.Text name :: Str.Delim "=" :: Str.Text value :: tl' -> + (Netencoding.Url.decode name, + Netencoding.Url.decode value) :: parse_next tl' + | Str.Text name :: Str.Delim "=" :: Str.Delim "&" :: tl' -> + (Netencoding.Url.decode name, "") :: parse_after_amp tl' + | Str.Text name :: Str.Delim "=" :: [] -> + [Netencoding.Url.decode name, ""] + | _ -> + failwith "Cgi.dest_url_encoded_parameters" + and parse_next tl = + match tl with + [] -> [] + | Str.Delim "&" :: tl' -> + parse_after_amp tl' + | _ -> + failwith "Cgi.dest_url_encoded_parameters" + in + let toklist = Str.full_split url_split_re parstr in + match toklist with + [] -> [] + | _ -> parse_after_amp toklist +;; + + +let mk_form_encoded_parameters ntv_triples = + failwith "Cgi.mk_form_encoded_parameters: not implemented";; + + +let dest_parameter_header header options = + let get_name s = + (* s is: form-data; ... name="fieldname" ... + * Extract "fieldname" + *) + try + let tok, params = Mimestring.scan_value_with_parameters s options in + List.assoc "name" params + with + Not_found -> + failwith "Cgi.dest_form_encoded_parameters" + | Failure "Mimestring.scan_value_with_parameters" -> + failwith "Cgi.dest_form_encoded_parameters" + in + + let get_filename s = + (* s is: form-data; ... filename="fieldname" ... + * Extract "fieldname" + *) + try + let tok, params = Mimestring.scan_value_with_parameters s options in + Some(List.assoc "filename" params) + with + Not_found -> + None + | Failure "Mimestring.scan_value_with_parameters" -> + failwith "Cgi.dest_form_encoded_parameters" + in + + let mime_type = + try List.assoc "content-type" header + with Not_found -> "text/plain" in (* the default *) + + let content_disposition = + try List.assoc "content-disposition" header + with + Not_found -> + failwith "Cgi.dest_form_encoded_parameters: no content-disposition" + in + + let name = get_name content_disposition in + let filename = get_filename content_disposition in + + name, mime_type, filename +;; + + +let dest_form_encoded_parameters parstr ~boundary config = + let options = + if List.mem Work_around_backslash_bug config.workarounds then + [ Mimestring.No_backslash_escaping ] + else + [] + in + let parts = + Mimestring.scan_multipart_body_and_decode + parstr 0 (String.length parstr) boundary in + List.map + (fun (params, value) -> + + let name, mime_type, filename = dest_parameter_header params options in + { arg_name = name; + arg_processing = Memory; + arg_buf_value = Buffer.create 1; + arg_mem_value = Some value; + arg_disk_value = Weak.create 1; + arg_file = None; + arg_fd = None; + arg_mimetype = mime_type; + arg_filename = filename; + arg_header = params; + } + + ) + parts +;; + + +let make_temporary_file config = + (* Returns (filename, out_channel). *) + let rec try_creation n = + try + let fn = + Filename.concat + config.tmp_directory + (config.tmp_prefix ^ "-" ^ (string_of_int n)) + in + let fd = + open_out_gen + [ Open_wronly; Open_creat; Open_excl; Open_binary ] + 0o666 + fn + in + fn, fd + with + Sys_error m -> + (* This does not look very intelligent, but it is the only chance + * to limit the number of trials. + *) + if n > 1000 then + failwith ("Cgi: Cannot create temporary file: " ^ m); + try_creation (n+1) + in + try_creation 0 +;; + + +let dest_form_encoded_parameters_from_netstream s ~boundary config = + let parts = ref [] in + let options = + if List.mem Work_around_backslash_bug config.workarounds then + [ Mimestring.No_backslash_escaping ] + else + [] + in + + let create header = + (* CALLBACK for scan_multipart_body_from_netstream *) + let name, mime_type, filename = dest_parameter_header header options in + let p0 = + { arg_name = name; + arg_processing = Memory; + arg_buf_value = Buffer.create 80; + arg_mem_value = None; + arg_disk_value = Weak.create 1; + arg_file = None; + arg_fd = None; + arg_mimetype = mime_type; + arg_filename = filename; + arg_header = header; + } + in + let pr = config.how_to_process_arguments p0 in + let p = { p0 with arg_processing = pr } in + if pr = File then begin + let fn, fd = make_temporary_file config in + p.arg_file <- Some fn; + p.arg_fd <- Some fd; + p.arg_mem_value <- None; + end; + p + in + + let add p s k n = + (* CALLBACK for scan_multipart_body_from_netstream *) + if (p.arg_processing = Automatic) && + (Buffer.length (p.arg_buf_value) >= Netstream.block_size s) then begin + (* This is a LARGE argument *) + p.arg_processing <- File; + let fn, fd = make_temporary_file config in + p.arg_file <- Some fn; + p.arg_fd <- Some fd; + p.arg_mem_value <- None; + output_string fd (Buffer.contents p.arg_buf_value); + p.arg_buf_value <- Buffer.create 1; + end; + + match p.arg_processing with + (Memory|Automatic) -> + Buffer.add_substring + p.arg_buf_value + (Netbuffer.unsafe_buffer (Netstream.window s)) + k + n + | File -> + let fd = match p.arg_fd with Some fd -> fd | None -> assert false in + output + fd + (Netbuffer.unsafe_buffer (Netstream.window s)) + k + n; + in + + let stop p = + (* CALLBACK for scan_multipart_body_from_netstream *) + begin match p.arg_processing with + (Memory|Automatic) -> + p.arg_mem_value <- Some (Buffer.contents p.arg_buf_value); + p.arg_buf_value <- Buffer.create 1; + | File -> + let fd = match p.arg_fd with Some fd -> fd | None -> assert false in + close_out fd; + p.arg_mem_value <- None + end; + parts := p :: !parts + in + + Mimestring.scan_multipart_body_from_netstream + s + boundary + create + add + stop; + + List.rev !parts +;; + + +let getenv name = + try Sys.getenv name with Not_found -> "";; + +(* getenv: + * We use this getenv instead of Sys.getenv. The CGI specification does not + * say anything about what should happen if a certain environment variable + * is not set. + * Some servers initialize the environment variable to the empty string if + * it is not applicable, some servers do not set the variable at all. Because + * of this, unset variables are always reported as empty variables. + * + * This is especially a problem with QUERY_STRING. + *) + +let mk_simple_arg ~name v = + { arg_name = name; + arg_processing = Memory; + arg_buf_value = Buffer.create 1; + arg_mem_value = Some v; + arg_disk_value = Weak.create 0; + arg_file = None; + arg_fd = None; + arg_mimetype = "text/plain"; + arg_filename = None; + arg_header = []; + } +;; + +let mk_memory_arg ~name ?(mime = "text/plain") ?filename ?(header = []) v = + { arg_name = name; + arg_processing = Memory; + arg_buf_value = Buffer.create 1; + arg_mem_value = Some v; + arg_disk_value = Weak.create 0; + arg_file = None; + arg_fd = None; + arg_mimetype = mime; + arg_filename = filename; + arg_header = header; + } +;; + +let mk_file_arg + ~name ?(mime = "text/plain") ?filename ?(header = []) v_filename = + let v_abs_filename = + if Filename.is_relative v_filename then + Filename.concat (Sys.getcwd()) v_filename + else + v_filename + in + { arg_name = name; + arg_processing = File; + arg_buf_value = Buffer.create 1; + arg_mem_value = None; + arg_disk_value = Weak.create 0; + arg_file = Some v_abs_filename; + arg_fd = None; + arg_mimetype = mime; + arg_filename = filename; + arg_header = header; + } +;; + + +let get_content_type config = + (* Get the environment variable CONTENT_TYPE; if necessary apply + * workarounds for browser bugs. + *) + let content_type = getenv "CONTENT_TYPE" in + let user_agent = getenv "HTTP_USER_AGENT" in + let eff_content_type = + if Str.string_match (Str.regexp ".*MSIE") user_agent 0 && + List.mem Work_around_MSIE_Content_type_bug config.workarounds + then begin + (* Microsoft Internet Explorer: When used with SSL connections, + * this browser sometimes produces CONTENT_TYPEs like + * "multipart/form-data; boundary=..., multipart/form-data; boundary=..." + * Workaround: Throw away everything after ", ". + *) + if Str.string_match (Str.regexp "\\([^,]*boundary[^,]*\\), .*boundary") + content_type 0 + then + Str.matched_group 1 content_type + else + content_type + end + else + content_type + in + eff_content_type +;; + + +let really_parse_args config = + let make_simple_arg (n,v) = mk_simple_arg n v in + + match getenv "REQUEST_METHOD" with + ("GET"|"HEAD") -> + List.map + make_simple_arg + (dest_url_encoded_parameters(getenv "QUERY_STRING")) + + | "POST" -> + let n = + try + int_of_string (getenv "CONTENT_LENGTH") + with + _ -> failwith "Cgi.parse_arguments" + in + if n > config.maximum_content_length then + raise Resources_exceeded; + begin + let mime_type, params = + Mimestring.scan_mime_type(get_content_type config) [] in + match mime_type with + "application/x-www-form-urlencoded" -> + let buf = String.create n in + really_input stdin buf 0 n; + List.map + make_simple_arg + (dest_url_encoded_parameters buf) + | "multipart/form-data" -> + let boundary = + try + List.assoc "boundary" params + with + Not_found -> + failwith "Cgi.parse_arguments" + in + (* -------------------------------------------------- DEBUG + let f = open_out "/tmp/cgiout" in + output_string f buf; + close_out f; + * -------------------------------------------------- + *) + dest_form_encoded_parameters_from_netstream + (Netstream.create_from_channel stdin (Some n) 4096) + boundary + config + | _ -> + failwith ("Cgi.parse_arguments: unknown content-type " ^ mime_type) + end + | _ -> + failwith "Cgi.parse_arguments: unknown method" + +let parsed = ref None;; (* protected by lock/unlock *) + +let lock = ref (fun () -> ());; +let unlock = ref (fun () -> ());; + +let init_mt new_lock new_unlock = + lock := new_lock; + unlock := new_unlock +;; + +let protect f = + !lock(); + try + let r = f() in + !unlock(); + r + with + x -> + !unlock(); + raise x +;; + +let parse_arguments config = + protect + (fun () -> + match !parsed with + Some _ -> () + | None -> + parsed := Some (List.map + (fun arg -> arg.arg_name, arg) + (really_parse_args config)) + ) +;; + +let arguments () = + protect + (fun () -> + match !parsed with + Some plist -> plist + | None -> + failwith "Cgi.arguments" + ) +;; + +let set_arguments arglist = + protect + (fun () -> + parsed := Some (List.map + (fun arg -> arg.arg_name, arg) + arglist) + ) +;; + +let default_config = + { maximum_content_length = max_int; + how_to_process_arguments = (fun _ -> Memory); + tmp_directory = "/var/tmp"; + tmp_prefix = "cgi-"; + workarounds = [ Work_around_MSIE_Content_type_bug; + Work_around_backslash_bug; + ] + } +;; + +let arg_value arg = + match arg.arg_mem_value with + None -> + begin + match Weak.get arg.arg_disk_value 0 with + None -> + begin + match arg.arg_file with + None -> + failwith "Cgi.arg_value: no value present" + | Some filename -> + let fd = open_in_bin filename in + try + let len = in_channel_length fd in + let s = String.create len in + really_input fd s 0 len; + Weak.set arg.arg_disk_value 0 (Some s); + close_in fd; + s + with + any -> close_in fd; raise any + end + | Some v -> v + end + | Some s -> + s +;; + +let arg_name arg = arg.arg_name;; +let arg_file arg = arg.arg_file;; +let arg_mimetype arg = arg.arg_mimetype;; +let arg_filename arg = arg.arg_filename;; +let arg_header arg = arg.arg_header;; + +let cleanup () = + protect + (fun () -> + match !parsed with + None -> () + | Some plist -> + List.iter + (fun (name, arg) -> + match arg.arg_file with + None -> () + | Some filename -> + (* We do not complain if the file does not exist anymore. *) + if Sys.file_exists filename then + Sys.remove filename; + arg.arg_file <- None + ) + plist + ) +;; + +let argument name = List.assoc name (arguments());; +let argument_value name = arg_value (argument name);; + +module Operators = struct + let ( !% ) = argument + let ( !$ ) = argument_value +end;; + + +let parse_args() = + parse_arguments default_config; + List.map + (fun (name, arg) -> name, arg_value arg) + (arguments()) +;; + +let parse_args_with_mimetypes() = + parse_arguments default_config; + List.map + (fun (name, arg) -> name, arg_mimetype arg, arg_value arg) + (arguments()) +;; + +let header s = + let t = + match s with + "" -> "text/html" + | _ -> s + in + print_string ("Content-type: " ^ t ^ "\n\n"); + flush stdout +;; + + +let this_url() = + "http://" ^ (getenv "SERVER_NAME") ^ (getenv "SCRIPT_NAME") +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.8 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.7 2000/06/25 21:40:36 gerd + * Added printer. + * + * Revision 1.6 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.5 2000/05/16 22:29:36 gerd + * Added support for two common file upload bugs. + * + * Revision 1.4 2000/04/15 16:47:27 gerd + * Last minor changes before releasing 0.6. + * + * Revision 1.3 2000/04/15 13:09:01 gerd + * Implemented uploads to temporary files. + * + * Revision 1.2 2000/03/02 01:15:30 gerd + * Updated. + * + * Revision 1.1 2000/02/25 15:21:12 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/cgi.mli b/helm/DEVEL/pxp/netstring/cgi.mli new file mode 100644 index 000000000..8aea499d8 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/cgi.mli @@ -0,0 +1,419 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(* FOR SIMPLE CGI PROGRAMS: + * + * If you do not need all the features of the API below, the following may + * be enough: + * + * - At the beginning of the main program, call 'parse_argument' with + * either 'default_config' as argument or with a customized configuration. + * - Use 'argument_value(name)' to get the string value of the CGI parameter + * 'name'. If you like, you can also open the Cgi.Operators module and + * write '!$ name' instead. Here, !$ is a prefix operator equivalent to + * argument_value. + * + * If you do not change the default configuration, you do not need to + * worry about temporary files - there are not any. + * + * Most of the other functions defined below deal with file uploads, and + * are only useful for that. + *) + + +(**********************************************************************) +(* CGI functions *) +(**********************************************************************) + +(* First, the general interface to the CGI argument parser. *) + +exception Resources_exceeded + +type argument + +type argument_processing = + Memory (* Keep the value of the argument in memory *) + | File (* Store the value of the argument into a temporary file *) + | Automatic (* Store only large arguments into files. An argument + * value is large if it is longer than about one block (4K). + * This is not an exact definition. + *) + +type workaround = + Work_around_MSIE_Content_type_bug + (* There is a bug in MSIE I observed together with SSL connections. + * The CONTENT_TYPE passed to the server has sometimes the wrong + * format. This option enables a workaround if the user agent string + * contains the word "MSIE". + *) + | Work_around_backslash_bug + (* There is a bug in many browsers: The backslash character is not + * handled as an escaping character in MIME headers. Because DOS- + * based systems use the backslash regularly in filenames, this bug + * matters. + * This option changes the interpretation of backslashes such that + * these are handled as normal characters. I do not know any browser + * that is not affected by this bug, so there is no check on + * the user agent string. + *) + + +type config = + { maximum_content_length : int; + (* The maximum CONTENT_LENGTH. Bigger requests trigger an + * Resources_exceeded exception. This feature can be used + * to detect primitive denial-of-service attacks. + *) + how_to_process_arguments : argument -> argument_processing; + (* After the beginning of an argument has been decoded, the + * type of processing is decided by invoking this function on + * the argument. Note that the passed argument is incomplete - + * it does not have a value. You can assume that name, filename, + * MIME type and the whole header are already known. + * - THIS CONFIGURATION PARAMETER ONLY AFFECTS ARGUMENTS + * "POST"ED FROM THE CLIENT IN FORM-ENCODED REPRESENTATION. + * All other transport methods can only handle the Memory + * processing type. + *) + tmp_directory : string; + (* The temporary directory to use for the temporary files. *) + tmp_prefix : string; + (* A prefix for temporary files. It is recommended that the prefix + * contains a part that is random or that depends on rapidly changing + * environment properties. For example, the process ID is a good + * candidate, or the current system time. It is not required that + * the prefix is unique; there is a fail-safe algorithm that + * computes a unique file name from the prefix, even if several + * CGI programs run concurrently. + *) + workarounds : workaround list; + (* Specifies which workarounds should be enabled. *) + } + +val parse_arguments : config -> unit +val arguments : unit -> (string * argument) list + (* - let () = parse_arguments config: + * Decodes the CGI arguments. 'config' specifies limits and processing + * hints; you can simply pass default_config (see below). + * + * - let arglist = get_arguments(): + * The function returns a list with (name, arg) pairs. The name is + * passed back as string while the value is returned as opaque type + * 'argument'. Below accessor functions are defined. These functions + * require that parse_arguments was invoked before. + * + * Note 1: You can invoke 'parse_arguments' several times, but only + * the first time the arguments are read in. If you call the function + * again, it does nothing (even if the config changes). This is also + * true if 'parse_arguments' has been invoked after 'set_arguments'. + * + * Note 2: It is not guaranteed that stdin has been read until EOF. + * Only CONTENT_LENGTH bytes are read from stdin (following the CGI spec). + * + * Note 3: If arguments are processed in File or Automatic mode, the + * caller of 'parse_arguments' is responsible for deleting the files + * after use. You may consider to apply the at_exit function of the + * core library for this purpose. See also 'cleanup' below. + *) + +val set_arguments : argument list -> unit + (* Alternatively, you can set the arguments to use. This overrides any + * previously parsed set of arguments, and also any following parsing. + * - Intended for debugging, and to make it possible to replace the + * CGI parser by a different one while retaining this API. + *) + +val default_config : config + (* maximum_content_length = maxint + * how_to_process_arguments = "use always Memory" + * tmp_directory = "/var/tmp" + * tmp_prefix = "cgi" + * workarounds = [ Work_around_MSIE_content_type_bug; + * Work_around_backslash_bug; + * ] + * + * Note 1: On some Unixes, a special file system is used for /tmp that + * stores the files into the virtual memory (main memory or swap area). + * Because of this, /var/tmp is preferred as default. + * + * Note 2: Filename.temp_file is not used because it depends on + * environment variables which are usually not set in a CGI environment. + *) + +val arg_name : argument -> string +val arg_value : argument -> string +val arg_file : argument -> string option +val arg_mimetype : argument -> string +val arg_filename : argument -> string option +val arg_header : argument -> (string * string) list + (* The accessor functions that return several aspects of arguments. + * arg_name: returns the name of the argument + * arg_value: returns the value of the argument. If the value is stored + * in a temporary file, the contents of this file are returned, i.e. + * the file is loaded. This may have some consequences: + * (1) The function may fail because of I/O errors. + * (2) The function may be very slow, especially if the file is + * non-local. + * (3) If the value is bigger than Sys.max_string_length, the function + * raises the exception Resources_exceeded. On 32 bit architectures, + * strings are limited to 16 MB. + * Note that loaded values are put into weak arrays. This makes it + * possible that subsequent calls of 'arg_value' on the same argument + * can avoid loading the value again, and that unused values will + * nevertheless be collected by the GC. + * arg_file: returns 'Some filename' if the value resides in a temporary + * file, and 'filename' is the absolute path of this file. If the + * value is only available in memory, None is returned. + * arg_mimetype: returns the MIME type of the argument. Note that the + * default MIME type is "text/plain", and that the default is returned + * if the MIME type is not available. + * arg_filename: returns 'Some filename' if the argument is associated + * with a certain filename (e.g. from a file upload); otherwise None + * arg_header: returns pairs (name,value) containing the complete header + * of the argument. If the transmission protocol does not specify + * a header, the empty list is passed back. + *) + +val mk_simple_arg : name:string -> string -> argument + (* mk_simple_arg name value: + * Creates a simple argument with only name, and a value passed by string. + * The MIME type is "text/plain". + *) + +val mk_memory_arg + : name:string -> ?mime:string -> ?filename:string -> + ?header:((string * string) list) -> string -> argument + (* mk_memory_arg name mimetype filename header value: + * Creates an argument whose value is kept in memory. + * + * Note: The signature of this function changed in release 0.8. + *) + +val mk_file_arg + : name:string -> ?mime:string -> ?filename:string -> + ?header:((string * string) list) -> string -> argument + (* mk_file_arg name mimetype filename header value_filename: + * Creates an argument whose value is stored in the file + * 'value_filename'. If this file name is not absolute, it is interpreted + * relative to the directory returned by Sys.getcwd() - this might not + * be what you want with respect to mount points and symlinks (and it + * depends on the operating system as getcwd is only POSIX.1). The + * file name is turned into an absolute name immediately, and the + * function arg_file returns the rewritten name. + * + * Note: The signature of this function changed in release 0.8. + *) + + +val cleanup : unit -> unit + (* Removes all temporary files that occur in the current set of arguments + * (as returned by 'arguments()'). + *) + + +(* Convenience functions: *) + +val argument : string -> argument + (* let argument name = List.assoc name (arguments()) -- i.e. returns + * the argument with the passed name. Of course, this function expects + * that 'parse_arguments' was called before. + *) + +val argument_value : string -> string + (* let argument_value name = arg_value(argument name) -- i.e. returns + * the value of the argument. + * See also Operators.( !$ ) below. + *) + +(* For toploop printers: *) + +val print_argument : argument -> unit + + +(* Now, the compatibility functions. *) + +val parse_args : unit -> (string * string) list + (* Decodes the arguments of the CGI and returns them as an association list + * Works whatever the method is (GET or POST) + *) + +val parse_args_with_mimetypes : unit -> (string * string * string) list + (* Like parse_args, but returns also the MIME type. + * The triples contain (name, mime_type, value). + * If an encoding was chosen that does not transfer the MIME type, + * "text/plain" is returned. + * + * THIS FUNCTION SHOULD BE CONSIDERED AS DEPRECATED. + * It was included in netstring-0.4, but most people want not only + * the MIME type. parse_arguments should be used instead. + *) + +val header : string -> unit + (* Prints the content-type header. + * the argument is the MIME type (default value is "text/html" if the + * argument is the empty string) + *) + +val this_url : unit -> string + (* Returns the address of the CGI *) + +(**********************************************************************) +(* The Operators module *) +(**********************************************************************) + +(* If you open the Operators module, you can write + * !% "name" instead of argument "name", and + * !$ "name" instead of argument_value "name" + *) + +module Operators : sig + val ( !% ) : string -> argument + (* same as 'argument' above *) + val ( !$ ) : string -> string + (* same as 'argument_value' above *) +end + +(**********************************************************************) +(* Low-level functions *) +(**********************************************************************) + +(* Encoding/Decoding within URLs: + * + * The following two functions perform the '%'-substitution for + * characters that may otherwise be interpreted as metacharacters. + * + * See also the Netencoding module. This interface contains these functions + * to keep the compatibility with the old Cgi module. + *) + +val decode : string -> string +val encode : string -> string + +(* URL-encoded parameters: + * + * The following two functions create and analyze URL-encoded parameters. + * Format: name1=val1&name2=val2&... + *) + +val mk_url_encoded_parameters : (string * string) list -> string + (* The argument is a list of (name,value) pairs. The result is the + * single URL-encoded parameter string. + *) + +val dest_url_encoded_parameters : string -> (string * string) list + (* The argument is the URL-encoded parameter string. The result is + * the corresponding list of (name,value) pairs. + * Note: Whitespace within the parameter string is ignored. + * If there is a format error, the function fails. + *) + +(* Form-encoded parameters: + * + * According to: RFCs 2388, 2183, 2045, 2046 + * + * General note: This is a simple API to encode/decode form-encoded parameters. + * Especially, it is not possible to pass the header of the parts through + * this API. + *) + +val mk_form_encoded_parameters : (string * string * string) list -> + (string * string) + (* The argument is a list of (name,mimetype,value) triples. + * The result is (parstr, boundary), where 'parstr' is the + * single form-encoded parameter string, and 'boundary' is the + * boundary to separate the message parts. + * + * THIS FUNCTION IS CURRENTLY NOT IMPLEMENTED! + *) + +val dest_form_encoded_parameters : string -> boundary:string -> config -> + argument list + (* The first argument is the form-encoded parameter string. + * The second argument is the boundary (extracted from the mime type). + * Third argument: Only the workarounds component is used. + * The result is + * the corresponding list of arguments (all in memory). + * If there is a format error, the function fails. + * Note: embedded multipart/mixed types are returned as they are, + * and are not recursively decoded. + * Note: The content-transfer-encodings "7bit", "8bit", "binary", + * "base64", and "quoted-printable" are supported. + * Note: Parameter names which include spaces or non-alphanumeric + * characters may be problematic (the rules of RFC 2047 are NOT applied). + * Note: The returned MIME type is not normalized. + *) + +val dest_form_encoded_parameters_from_netstream + : Netstream.t -> boundary:string -> config -> argument list + (* let arglist = dest_form_encoded_parameters_from_netstream s b c: + * Reads the form-encoded parameters from netstream s. The boundary + * is passed in b, and the configuration in c. + * A list of arguments is returned. + * + * See also dest_form_encoded_parameters. + * + * Restriction: In contrast to dest_form_encoded_parameters, this + * function is not able to handle the content-transfer-encodings + * "base64" and "quoted-printable". (This is not really a restriction + * because no browser uses these encodings in conjunction with HTTP. + * This is different if mail transport is chosen. - The reason for + * this restriction is that there are currently no stream functions + * for decoding.) + *) + +(* Private functions: *) + +val init_mt : (unit -> unit) -> (unit -> unit) -> unit + + +(**********************************************************************) +(* Compatibility with CGI library by J.-C. Filliatre *) +(**********************************************************************) + +(* The following functions are compatible with J.-C. Filliatre's CGI + * library: + * + * parse_args, header, this_url, decode, encode. + * + * Note that the new implementation of parse_args can be safely invoked + * several times. + * + * Since release 0.8, Netstring's CGI implementation is again thread-safe. + *) + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.8 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.7 2000/06/25 21:40:36 gerd + * Added printer. + * + * Revision 1.6 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.5 2000/05/16 22:28:13 gerd + * New "workarounds" config component. + * + * Revision 1.4 2000/04/15 16:47:27 gerd + * Last minor changes before releasing 0.6. + * + * Revision 1.3 2000/04/15 13:09:01 gerd + * Implemented uploads to temporary files. + * + * Revision 1.2 2000/03/02 01:15:30 gerd + * Updated. + * + * Revision 1.1 2000/02/25 15:21:12 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/compat-base64/META b/helm/DEVEL/pxp/netstring/compat-base64/META new file mode 100644 index 000000000..a5c003ea4 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/compat-base64/META @@ -0,0 +1,3 @@ +version = "0.5" +requires = "netstring" +description = "Compatibility with base64" diff --git a/helm/DEVEL/pxp/netstring/compat-cgi/META b/helm/DEVEL/pxp/netstring/compat-cgi/META new file mode 100644 index 000000000..2294921a0 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/compat-cgi/META @@ -0,0 +1,3 @@ +version = "0.5" +requires = "netstring" +description = "Compatibility with cgi" diff --git a/helm/DEVEL/pxp/netstring/depend b/helm/DEVEL/pxp/netstring/depend new file mode 100644 index 000000000..5991264c6 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/depend @@ -0,0 +1,36 @@ +base64.cmo: netencoding.cmi base64.cmi +base64.cmx: netencoding.cmx base64.cmi +cgi.cmo: mimestring.cmi netbuffer.cmi netencoding.cmi netstream.cmi cgi.cmi +cgi.cmx: mimestring.cmx netbuffer.cmx netencoding.cmx netstream.cmx cgi.cmi +mimestring.cmo: netbuffer.cmi netencoding.cmi netstream.cmi netstring_str.cmi \ + mimestring.cmi +mimestring.cmx: netbuffer.cmx netencoding.cmx netstream.cmx netstring_str.cmx \ + mimestring.cmi +netbuffer.cmo: netbuffer.cmi +netbuffer.cmx: netbuffer.cmi +netconversion.cmo: netmappings.cmi netconversion.cmi +netconversion.cmx: netmappings.cmx netconversion.cmi +netencoding.cmo: netstring_str.cmi netencoding.cmi +netencoding.cmx: netstring_str.cmx netencoding.cmi +nethtml.cmo: nethtml.cmi +nethtml.cmx: nethtml.cmi +netmappings.cmo: netmappings.cmi +netmappings.cmx: netmappings.cmi +netmappings_iso.cmo: netmappings.cmi +netmappings_iso.cmx: netmappings.cmx +netmappings_other.cmo: netmappings.cmi +netmappings_other.cmx: netmappings.cmx +netstream.cmo: netbuffer.cmi netstream.cmi +netstream.cmx: netbuffer.cmx netstream.cmi +netstring_mt.cmo: cgi.cmi netmappings.cmi netstring_str.cmi netstring_mt.cmi +netstring_mt.cmx: cgi.cmx netmappings.cmx netstring_str.cmx netstring_mt.cmi +netstring_str.cmo: netstring_str.cmi +netstring_str.cmx: netstring_str.cmi +netstring_top.cmo: netstring_top.cmi +netstring_top.cmx: netstring_top.cmi +neturl.cmo: netencoding.cmi neturl.cmi +neturl.cmx: netencoding.cmx neturl.cmi +cgi.cmi: netstream.cmi +mimestring.cmi: netstream.cmi +netmappings.cmi: netconversion.cmi +netstream.cmi: netbuffer.cmi diff --git a/helm/DEVEL/pxp/netstring/depend.pkg b/helm/DEVEL/pxp/netstring/depend.pkg new file mode 100644 index 000000000..e69de29bb diff --git a/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB new file mode 100644 index 000000000..d942e2786 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB @@ -0,0 +1,52 @@ +****************************************************************************** +ABOUT-FINDLIB - Package manager for O'Caml +****************************************************************************** + + +============================================================================== +Abstract +============================================================================== + +The findlib library provides a scheme to manage reusable software components +(packages), and includes tools that support this scheme. Packages are +collections of OCaml modules for which metainformation can be stored. The +packages are kept in the filesystem hierarchy, but with strict directory +structure. The library contains functions to look the directory up that stores +a package, to query metainformation about a package, and to retrieve dependency +information about multiple packages. There is also a tool that allows the user +to enter queries on the command-line. In order to simplify compilation and +linkage, there are new frontends of the various OCaml compilers that can +directly deal with packages. + +Together with the packages metainformation is stored. This includes a version +string, the archives the package consists of, and additional linker options. +Packages can also be dependent on other packages. There is a query which finds +out all predecessors of a list of packages and sorts them topologically. The +new compiler frontends do this implicitly. + +Metainformation can be conditional, i.e. depend on a set of predicates. This is +mainly used to be able to react on certain properties of the environment, such +as if the bytecode or the native compiler is invoked, if the application is +multi-threaded, and a few more. If the new compiler frontends are used, most +predicates are found out automatically. + +There is special support for scripts. A new directive, "#require", loads +packages into scripts. Of course, this works only with newly created toploops +which include the findlib library. + +============================================================================== +Where to get findlib +============================================================================== + +The manual of findlib is available online [1]. You can download findlib here +[2]. + + +-------------------------- + +[1] see http://www.ocaml-programming.de/packages/documentation/findlib/ + +[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz + + + diff --git a/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml new file mode 100644 index 000000000..d1dc5b04e --- /dev/null +++ b/helm/DEVEL/pxp/netstring/doc/ABOUT-FINDLIB.xml @@ -0,0 +1,61 @@ + + +%common; + +findlib"> +Findlib"> + +]> + + + + Abstract +

+The &f; library provides a scheme to manage reusable software +components (packages), and includes tools that support this +scheme. Packages are collections of OCaml modules for which +metainformation can be stored. The packages are kept in the filesystem +hierarchy, but with strict directory structure. The library contains +functions to look the directory up that stores a package, to query +metainformation about a package, and to retrieve dependency +information about multiple packages. There is also a tool that allows +the user to enter queries on the command-line. In order to simplify +compilation and linkage, there are new frontends of the various OCaml +compilers that can directly deal with packages. +

+ +

+Together with the packages metainformation is stored. This includes a +version string, the archives the package consists of, and additional +linker options. Packages can also be dependent on other +packages. There is a query which finds out all predecessors of a list +of packages and sorts them topologically. The new compiler frontends +do this implicitly. +

+ +

+Metainformation can be conditional, i.e. depend on a set of +predicates. This is mainly used to be able to react on certain +properties of the environment, such as if the bytecode or the native +compiler is invoked, if the application is multi-threaded, and a few +more. If the new compiler frontends are used, most predicates are +found out automatically. +

+ +

+There is special support for scripts. A new directive, "#require", +loads packages into scripts. Of course, this works only with newly +created toploops which include the &f; library. +

+ +
+ + Where to get findlib +

+The manual of &f; is available online. +You can download &f; here. +

+
+
diff --git a/helm/DEVEL/pxp/netstring/doc/INSTALL b/helm/DEVEL/pxp/netstring/doc/INSTALL new file mode 100644 index 000000000..cca39944b --- /dev/null +++ b/helm/DEVEL/pxp/netstring/doc/INSTALL @@ -0,0 +1,128 @@ +****************************************************************************** +INSTALL - Netstring, string processing functions for the net +****************************************************************************** + + +============================================================================== +The "Netstring" package +============================================================================== + +------------------------------------------------------------------------------ +Prerequisites +------------------------------------------------------------------------------ + +Netstring does not need any other packages besides the O'Caml core. Netstring +needs at least O'Caml 3.00. The installation procedure defined in the Makefile +requires findlib [1] to work [2]. + +------------------------------------------------------------------------------ +Configuration +------------------------------------------------------------------------------ + +It is not necessary to configure "Netstring". + +------------------------------------------------------------------------------ +Compilation +------------------------------------------------------------------------------ + +The Makefile defines the following goals: + +- make all + compiles with the bytecode compiler and creates netstring.cma, + netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and + netmappings_other.cmo + +- make opt + compiles with the native compiler and creates netstring.cmxa, + netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx + +The archive netstring.cmx?a contains the functionality, and the two +single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add +configurations to the character set conversion module. These configurations are +optional: + +- Netmappings_iso: Conversion tables for the character sets ISO-8859-2, -3, + -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15. + +- Netmappings_other: Conversion tables for the character sets WINDOWS-1250, + -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258; code pages 037, 424, + 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861, 862, 863, 864, 865, + 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh Roman encoding; + Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats Encodings. + +Even without these configuration files, the conversion module is able to handle +the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant of +UTF-8. + +The module Netstring_mt must be linked into multi-threaded applications; +otherwise some mutexes remain uninitialized. + +The module Netstring_top loads several printers for abstract values (for +toploops). + +------------------------------------------------------------------------------ +Installation +------------------------------------------------------------------------------ + +The Makefile defines the following goals: + +- make install + installs the bytecode archive, the interface definitions, and if present, + the native archive in the default location of findlib + +- make install-cgi + Installs a pseudo package "cgi" which is compatible with the old cgi + package. This has the effect that software searching the "cgi" package will + find the netstring package instead. This is recommended. + +- make install-base64 + Installs a pseudo package "base64" which is compatible with the old base64 + package. This has the effect that software searching the "base64" package + will find the netstring package instead. This is recommended. + +- make uninstall + removes the package + +- make uninstall-cgi + removes the "cgi" compatibility package + +- make uninstall-base64 + removes the "base64" compatibility package + +------------------------------------------------------------------------------ +Linking netstring with findlib +------------------------------------------------------------------------------ + +The command + +ocamlfind ocamlc ... -package netstring ... -linkpkg ... + +links as much as possible code from netstring into your application: All +conversion tables; when -thread is specified, the initialization code for +multi-threaded programs; when a toploop is created, the code setting the value +printers. + +The following predicates reduce the amount of linked code: + +- netstring_only_iso: Only the conversion tables for the ISO-8859 series of + character sets are linked. + +- netstring_minimum: No additional conversion tables are linked; only + ISO-8859-1 and the UTF encodings work. + +For example, the command may look like + +ocamlfind ocamlc ... + -package netstring -predicates netstring_only_iso ... -linkpkg ... + +to link only the ISO-8859 conversion tables. + + +-------------------------- + +[1] see http://www.ocaml-programming.de/packages/documentation/findlib/ + +[2] Findlib is a package manager, see the file ABOUT-FINDLIB. + + + diff --git a/helm/DEVEL/pxp/netstring/doc/INSTALL.xml b/helm/DEVEL/pxp/netstring/doc/INSTALL.xml new file mode 100644 index 000000000..b5b53eddc --- /dev/null +++ b/helm/DEVEL/pxp/netstring/doc/INSTALL.xml @@ -0,0 +1,153 @@ + + +%common; + +Netstring"> + +]> + + + The "Netstring" package + Prerequisites +

+&m; does not need any other packages besides the O'Caml core. &m; needs +at least O'Caml 3.00. The installation procedure defined in the Makefile +requires findlib to +workFindlib is a package manager, see the file +ABOUT-FINDLIB.. +

+
+ + Configuration +

+It is not necessary to configure "Netstring". +

+
+ + Compilation +

+The Makefile defines the following goals: +

+
    +
  • +

    make all

    +

    compiles with the bytecode compiler and creates netstring.cma, +netstring_mt.cmo, netstring_top.cmo, netmappings_iso.cmo, and +netmappings_other.cmo

    +
  • +
  • +

    make opt

    +

    compiles with the native compiler and creates netstring.cmxa, +netstring_mt.cmx, netmappings_iso.cmx, and netmappings_other.cmx

    +
  • +
+ +

The archive netstring.cmx?a contains the functionality, and the two +single-module files netmappings_iso.cm[ox] and netmappings_other.cm[ox] add +configurations to the character set conversion module. These configurations are +optional:

+ +
    +
  • Netmappings_iso: Conversion tables for the character sets +ISO-8859-2, -3, -4, -5, -6, -7, -8, -9, -10, 13, -14, and -15.

    +
  • +
  • Netmappings_other: Conversion tables for the character sets +WINDOWS-1250, -1251, -1252, -1253, -1254, -1255, -1256, -1257, -1258; +code pages 037, 424, 437, 500, 737, 775, 850, 852, 855, 856, 857, 860, 861, +862, 863, 864, 865, 866, 869, 874, 875, 1006, 1026; JIS-0201; KOI8R; Macintosh +Roman encoding; Adobe Standard Encoding, Symbol Encoding, and Zapf Dingbats +Encodings.

    +
  • +
+ +

Even without these configuration files, the conversion module is able to +handle the encodings ISO-8859-1, US-ASCII, UTF-16, UTF-8, and the Java variant +of UTF-8.

+ +

The module Netstring_mt must be linked into multi-threaded applications; +otherwise some mutexes remain uninitialized.

+ +

The module Netstring_top loads several printers for abstract values (for +toploops).

+ +
+ + Installation +

+The Makefile defines the following goals:

+
    +
  • +

    make install

    +

    installs the bytecode archive, the interface definitions, and if +present, the native archive in the default location of findlib +

    +
  • + +
  • +

    make install-cgi

    +

    Installs a pseudo package "cgi" which is compatible with the old +cgi package. This has the effect that software searching the "cgi" package will +find the netstring package instead. This is recommended.

    +
  • + +
  • +

    make install-base64

    Installs a pseudo package "base64" +which is compatible with the old base64 package. This has the effect that +software searching the "base64" package will find the netstring package +instead. This is recommended.

    +
  • + +
  • +

    make uninstall

    +

    removes the package

    +
  • + +
  • +

    make uninstall-cgi

    +

    removes the "cgi" compatibility package

    +
  • + +
  • +

    make uninstall-base64

    +

    removes the "base64" compatibility package

    +
  • +
+
+ + + + Linking netstring with findlib +

The command + +ocamlfind ocamlc ... -package netstring ... -linkpkg ... + +links as much as possible code from netstring into your application: All +conversion tables; when -thread is specified, the initialization code for +multi-threaded programs; when a toploop is created, the code setting the value +printers.

+ +

The following predicates reduce the amount of linked code:

+ +
    +
  • netstring_only_iso: Only the conversion tables for the ISO-8859 +series of character sets are linked.

    +
  • +
  • netstring_minimum: No additional conversion tables are linked; +only ISO-8859-1 and the UTF encodings work.

    +
  • +
+ +

For example, the command may look like + + +ocamlfind ocamlc ... + -package netstring -predicates netstring_only_iso ... -linkpkg ... + + +to link only the ISO-8859 conversion tables.

+
+ +
+
\ No newline at end of file diff --git a/helm/DEVEL/pxp/netstring/doc/Makefile b/helm/DEVEL/pxp/netstring/doc/Makefile new file mode 100644 index 000000000..7f8450be3 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/doc/Makefile @@ -0,0 +1,22 @@ +.PHONY: all +all: README INSTALL ABOUT-FINDLIB + +README: README.xml common.xml config.xml + readme -text README.xml >README + +INSTALL: INSTALL.xml common.xml config.xml + readme -text INSTALL.xml >INSTALL + +ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml + readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB + +.PHONY: clean +clean: + +.PHONY: CLEAN +CLEAN: clean + +.PHONY: distclean +distclean: clean + rm -f *~ + diff --git a/helm/DEVEL/pxp/netstring/doc/README b/helm/DEVEL/pxp/netstring/doc/README new file mode 100644 index 000000000..b590416be --- /dev/null +++ b/helm/DEVEL/pxp/netstring/doc/README @@ -0,0 +1,212 @@ +****************************************************************************** +README - Netstring, string processing functions for the net +****************************************************************************** + + +============================================================================== +Abstract +============================================================================== + +Netstring is a collection of string processing functions that are useful in +conjunction with Internet messages and protocols. In particular, it contains +functions for the following purposes: + +- Parsing MIME messages + +- Several encoding/decoding functions (Base 64, Quoted Printable, Q, + URL-encoding) + +- A new implementation of the CGI interface that allows users to upload files + +- A simple HTML parser + +- URL parsing, printing and processing + +- Conversion between character sets + +============================================================================== +Download +============================================================================== + +You can download Netstring as gzip'ed tarball [1]. + +============================================================================== +Documentation +============================================================================== + +Sorry, there is no manual. The mli files describe each function in detail. +Furthermore, the following additional information may be useful. + +------------------------------------------------------------------------------ +New CGI implementation +------------------------------------------------------------------------------ + +For a long time, the CGI implementation by Jean-Christophe Filliatre has been +the only freely available module that implemented the CGI interface (it also +based on code by Daniel de Rauglaudre). It worked well, but it did not support +file uploads because this requires a parser for MIME messages. + +The main goal of Netstring is to realize such uploads, and because of this it +contains an almost complete parser for MIME messages. + +The new CGI implementation provides the same functions than the old one, and +some extensions. If you call Cgi.parse_args(), you get the CGI parameters as +before, but as already explained this works also if the parameters are +encaspulated as MIME message. In the HTML code, you can select the MIME format +by using + +
+... +
+ + +- this "enctype" attribute forces the browser to send the form parameters as +multipart MIME message (Note: You can neither send the parameters of a +conventional hyperlink as MIME message nor the form parameters if the "method" +is "get"). In many browsers only this particular encoding enables the file +upload elements, you cannot perform file uploads with other encodings. + +As MIME messages can transport MIME types, filename, and other additional +properties, it is also possible to get these using the enhanced interface. +After calling + +Cgi.parse_arguments config + +you can get all available information about a certain parameter by invoking + +let param = Cgi.argument "name" + +- where "param" has the type "argument". There are several accessor functions +to extract the various aspects of arguments (name, filename, value by string, +value by temporary file, MIME type, MIME header) from "argument" values. + +------------------------------------------------------------------------------ +Base64, and other encodings +------------------------------------------------------------------------------ + +Netstring is also the successor of the Base64 package. It provides a Base64 +compatible interface, and an enhanced API. The latter is contained in the +Netencoding module which also offers implementations of the "quoted printable", +"Q", and "URL" encodings. Please see netencoding.mli for details. + +------------------------------------------------------------------------------ +The MIME scanner functions +------------------------------------------------------------------------------ + +In the Mimestring module you can find several functions scanning parts of MIME +messages. These functions already cover most aspects of MIME messages: Scanning +of headers, analysis of structured header entries, and scanning of multipart +bodies. Of course, a full-featured MIME scanner would require some more +functions, especially concrete parsers for frequent structures (mail addresses +or date strings). + +Please see the file mimestring.mli for details. + +------------------------------------------------------------------------------ +The HTML parser +------------------------------------------------------------------------------ + +The HTML parser should be able to read every HTML file; whether it is correct +or not. The parser tries to recover from parsing errors as much as possible. + +The parser returns the HTML term as conventional recursive value (i.e. no +object-oriented design). + +The parser depends a bit on knowledge about the HTML version; mainly because it +needs to know the tags that are always empty. It may be necessary that you must +adjust this configuration before the parser works well enough for your purpose. + +Please see the Nethtml module for details. + +------------------------------------------------------------------------------ +The abstract data type URL +------------------------------------------------------------------------------ + +The module Neturl contains support for URL parsing and processing. The +implementation follows strictly the standards RFC 1738 and RFC 1808. URLs can +be parsed, and several accessor functions allow the user to get components of +parsed URLs, or to change components. Modifying URLs is safe; it is impossible +to create a URL that does not have a valid string representation. + +Both absolute and relative URLs are supported. It is possible to apply a +relative URL to a base URL in order to get the corresponding absolute URL. + +------------------------------------------------------------------------------ +Conversion between character sets and encodings +------------------------------------------------------------------------------ + +The module Netconversion converts strings from one characters set to another. +It is Unicode-based, and there are conversion tables for more than 50 +encodings. + +============================================================================== +Author, Copying +============================================================================== + +Netstring has been written by Gerd Stolpmann [2]. You may copy it as you like, +you may use it even for commercial purposes as long as the license conditions +are respected, see the file LICENSE coming with the distribution. It allows +almost everything. + +============================================================================== +History +============================================================================== + +- Changed in 0.9.3: Fixed a bug in the "install" rule of the Makefile. + +- Changed in 0.9.2: New format for the conversion tables which are now much + smaller. + +- Changed in 0.9.1: Updated the Makefile such that (native-code) compilation + of netmappings.ml becomes possible. + +- Changed in 0.9: Extended Mimestring module: It can now process RFC-2047 + messages. + New Netconversion module which converts strings between character encodings. + +- Changed in 0.8.1: Added the component url_accepts_8bits to + Neturl.url_syntax. This helps processing URLs which intentionally contain + bytes >= 0x80. + Fixed a bug: Every URL containing a 'j' was malformed! + +- Changed in 0.8: Added the module Neturl which provides the abstract data + types of URLs. + The whole package is now thread-safe. + Added printers for the various opaque data types. + Added labels to function arguments where appropriate. The following + functions changed their signatures significantly: Cgi.mk_memory_arg, + Cgi.mk_file_arg. + +- Changed in 0.7: Added workarounds for frequent browser bugs. Some functions + take now an additional argument specifying which workarounds are enabled. + +- Changed in 0.6.1: Updated URLs in documentation. + +- Changed in 0.6: The file upload has been re-implemented to support large + files; the file is now read block by block and the blocks can be collected + either in memory or in a temporary file. + Furthermore, the CGI API has been revised. There is now an opaque data type + "argument" that hides all implementation details and that is extensible (if + necessary, it is possible to add features without breaking the interface + again). + The CGI argument parser can be configured; currently it is possible to limit + the size of uploaded data, to control by which method arguments are + processed, and to set up where temporary files are created. + The other parts of the package that have nothing to do with CGI remain + unchanged. + +- Changed in 0.5.1: A mistake in the documentation has been corrected. + +- Initial version 0.5: The Netstring package wants to be the successor of the + Base64-0.2 and the Cgi-0.3 packages. The sum of both numbers is 0.5, and + because of this, the first version number is 0.5. + + +-------------------------- + +[1] see http://www.ocaml-programming.de/packages/netstring-0.9.2.tar.gz + +[2] see mailto:gerd@gerd-stolpmann.de + + + diff --git a/helm/DEVEL/pxp/netstring/doc/README.xml b/helm/DEVEL/pxp/netstring/doc/README.xml new file mode 100644 index 000000000..bbf473e99 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/doc/README.xml @@ -0,0 +1,244 @@ + + +%common; + + +up'> + + +%config; + +]> + + + + Abstract +

+Netstring is a collection of string processing functions that are +useful in conjunction with Internet messages and protocols. In particular, +it contains functions for the following purposes:

+ +
    +
  • Parsing MIME messages

    +
  • +
  • Several encoding/decoding functions (Base 64, Quoted Printable, Q, URL-encoding)

    +
  • +
  • A new implementation of the CGI interface that allows users to upload files

    +
  • +
  • A simple HTML parser

    +
  • +
  • URL parsing, printing and processing

    +
  • +
  • Conversion between character sets

    +
  • +
+ +
+ + + Download +

+You can download Netstring as gzip'ed tarball. +

+ +
+ + + Documentation +

+Sorry, there is no manual. The mli files describe each function in +detail. Furthermore, the following additional information may be useful.

+ + + New CGI implementation + +

For a long time, the CGI implementation by Jean-Christophe Filliatre +has been the only freely available module that implemented the CGI interface +(it also based on code by Daniel de Rauglaudre). It worked well, but it did not +support file uploads because this requires a parser for MIME messages.

+

The main goal of Netstring is to realize such uploads, and because of +this it contains an almost complete parser for MIME messages.

+

The new CGI implementation provides the same functions than the old +one, and some extensions. If you call Cgi.parse_args(), you get the CGI +parameters as before, but as already explained this works also if the +parameters are encaspulated as MIME message. In the HTML code, you can select +the MIME format by using + +... + +]]> + +- this "enctype" attribute forces the browser to send the form parameters +as multipart MIME message (Note: You can neither send the parameters of a +conventional hyperlink as MIME message nor the form parameters if the +"method" is "get"). In many browsers only this particular encoding enables +the file upload elements, you cannot perform file uploads with other encodings. +

+ +

As MIME messages can transport MIME types, filename, and other +additional properties, it is also possible to get these using the enhanced +interface. After calling + +you can get all available information about a certain parameter by invoking + +- where "param" has the type "argument". There are several accessor functions +to extract the various aspects of arguments (name, filename, value by string, +value by temporary file, MIME type, MIME header) from "argument" values. +

+ +
+ + + + Base64, and other encodings + +

Netstring is also the successor of the Base64 package. It provides a +Base64 compatible interface, and an enhanced API. The latter is contained in +the Netencoding module which also offers implementations of the "quoted +printable", "Q", and "URL" encodings. Please see netencoding.mli for +details.

+ +
+ + + + The MIME scanner functions + +

In the Mimestring module you can find several functions scanning parts +of MIME messages. These functions already cover most aspects of MIME messages: +Scanning of headers, analysis of structured header entries, and scanning of +multipart bodies. Of course, a full-featured MIME scanner would require some +more functions, especially concrete parsers for frequent structures +(mail addresses or date strings). +

+

Please see the file mimestring.mli for details.

+
+ + + + The HTML parser + +

The HTML parser should be able to read every HTML file; whether it is +correct or not. The parser tries to recover from parsing errors as much as +possible. +

+

The parser returns the HTML term as conventional recursive value +(i.e. no object-oriented design).

+

The parser depends a bit on knowledge about the HTML version; mainly +because it needs to know the tags that are always empty. It may be necessary +that you must adjust this configuration before the parser works well enough for +your purpose. +

+

Please see the Nethtml module for details.

+
+ + + The abstract data type URL +

The module Neturl contains support for URL parsing and processing. +The implementation follows strictly the standards RFC 1738 and RFC 1808. +URLs can be parsed, and several accessor functions allow the user to +get components of parsed URLs, or to change components. Modifying URLs +is safe; it is impossible to create a URL that does not have a valid +string representation.

+ +

Both absolute and relative URLs are supported. It is possible to +apply a relative URL to a base URL in order to get the corresponding +absolute URL.

+
+ + + Conversion between character sets and encodings +

The module Netconversion converts strings from one characters set +to another. It is Unicode-based, and there are conversion tables for more than +50 encodings.

+
+ +
+ + + Author, Copying +

+Netstring has been written by &person.gps;. You may copy it as you like, +you may use it even for commercial purposes as long as the license conditions +are respected, see the file LICENSE coming with the distribution. It allows +almost everything. +

+
+ + + History + +
    +
  • Changed in 0.9.3: Fixed a bug in the "install" rule of +the Makefile.

    +
  • +
  • Changed in 0.9.2: New format for the conversion tables +which are now much smaller.

    +
  • +
  • Changed in 0.9.1: Updated the Makefile such that +(native-code) compilation of netmappings.ml becomes possible. +

    +
  • +
  • Changed in 0.9: Extended Mimestring module: It can +now process RFC-2047 messages.

    +

    New Netconversion module which converts strings between character +encodings.

    +
  • +
  • Changed in 0.8.1: Added the component +url_accepts_8bits to Neturl.url_syntax. This helps processing URLs which +intentionally contain bytes >= 0x80.

    +

    Fixed a bug: Every URL containing a 'j' was malformed!

    +
  • +
  • Changed in 0.8: Added the module Neturl which +provides the abstract data types of URLs.

    +

    The whole package is now thread-safe.

    +

    Added printers for the various opaque data types.

    +

    Added labels to function arguments where appropriate. The +following functions changed their signatures significantly: +Cgi.mk_memory_arg, Cgi.mk_file_arg.

    +
  • +
  • Changed in 0.7: Added workarounds for frequent +browser bugs. Some functions take now an additional argument +specifying which workarounds are enabled.

    +
  • +
  • Changed in 0.6.1: Updated URLs in documentation.

    +
  • + +
  • Changed in 0.6: The file upload has been re-implemented +to support large files; the file is now read block by block and the blocks can +be collected either in memory or in a temporary file.
    +Furthermore, the CGI API has been revised. There is now an opaque data type +"argument" that hides all implementation details and that is extensible (if +necessary, it is possible to add features without breaking the interface +again).
    +The CGI argument parser can be configured; currently it is possible to +limit the size of uploaded data, to control by which method arguments are +processed, and to set up where temporary files are created.
    +The other parts of the package that have nothing to do with CGI remain +unchanged. +

    +
  • + +
  • Changed in 0.5.1: A mistake in the documentation has +been corrected.

    +
  • + +
  • Initial version 0.5: +The Netstring package wants to be the successor of the Base64-0.2 and +the Cgi-0.3 packages. The sum of both numbers is 0.5, and because of this, +the first version number is 0.5. +

    +
  • +
+
+ +
+ diff --git a/helm/DEVEL/pxp/netstring/mimestring.ml b/helm/DEVEL/pxp/netstring/mimestring.ml new file mode 100644 index 000000000..8fc4bfcbe --- /dev/null +++ b/helm/DEVEL/pxp/netstring/mimestring.ml @@ -0,0 +1,1035 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +module Str = Netstring_str;; + +let cr_or_lf_re = Str.regexp "[\013\n]";; + +let trim_right_spaces_re = + Str.regexp "[ \t]+$";; + +let trim_left_spaces_re = + Str.regexp "^[ \t]+";; + +let header_re = + Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";; + +let empty_line_re = + Str.regexp "\013?\n";; + +let end_of_header_re = + Str.regexp "\n\013?\n";; + + +let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 = + let rec parse_header i l = + match Str.string_partial_match header_re parstr i with + Some r -> + let i' = Str.match_end r in + if i' > i1 then + failwith "Mimestring.scan_header"; + let name = String.lowercase(Str.matched_group r 1 parstr) in + let value_with_crlf = + Str.matched_group r 2 parstr in + let value = + if unfold then begin + let value_with_rspaces = + Str.global_replace cr_or_lf_re "" value_with_crlf in + let value_with_lspaces = + Str.global_replace trim_right_spaces_re "" value_with_rspaces in + Str.global_replace trim_left_spaces_re "" value_with_lspaces + end + else value_with_crlf + in + parse_header i' ( (name,value) :: l) + | None -> + (* The header must end with an empty line *) + begin match Str.string_partial_match empty_line_re parstr i with + Some r' -> + List.rev l, Str.match_end r' + | None -> + failwith "Mimestring.scan_header" + end + in + parse_header i0 [] +;; + +type s_token = + Atom of string + | EncodedWord of (string * string * string) + | QString of string + | Control of char + | Special of char + | DomainLiteral of string + | Comment + | End +;; + +type s_option = + No_backslash_escaping + | Return_comments + | Recognize_encoded_words +;; + +type s_extended_token = + { token : s_token; + token_pos : int; + token_line : int; + token_linepos : int; (* Position of the beginning of the line *) + token_len : int; + mutable token_sep : bool; (* separates adjacent encoded words *) + } +;; + +let get_token et = et.token;; +let get_pos et = et.token_pos;; +let get_line et = et.token_line;; +let get_column et = et.token_pos - et.token_linepos;; +let get_length et = et.token_len;; +let separates_adjacent_encoded_words et = et.token_sep;; + +let get_decoded_word et = + match et.token with + Atom s -> s + | QString s -> s + | Control c -> String.make 1 c + | Special c -> String.make 1 c + | DomainLiteral s -> s + | Comment -> "" + | EncodedWord (_, encoding, content) -> + ( match encoding with + ("Q"|"q") -> + Netencoding.Q.decode content + | ("B"|"b") -> + Netencoding.Base64.decode + ~url_variant:false + ~accept_spaces:false + content + | _ -> failwith "get_decoded_word" + ) + | End -> + failwith "get_decoded_word" +;; + +let get_charset et = + match et.token with + EncodedWord (charset, _, _) -> charset + | End -> failwith "get_charset" + | _ -> "US-ASCII" +;; + +type scanner_spec = + { (* What the user specifies: *) + scanner_specials : char list; + scanner_options : s_option list; + (* Derived from that: *) + mutable opt_no_backslash_escaping : bool; + mutable opt_return_comments : bool; + mutable opt_recognize_encoded_words : bool; + + mutable is_special : bool array; + mutable space_is_special : bool; + } +;; + +type scanner_target = + { scanned_string : string; + mutable scanner_pos : int; + mutable scanner_line : int; + mutable scanner_linepos : int; + (* Position of the beginning of the line *) + mutable scanned_tokens : s_extended_token Queue.t; + (* A queue of already scanned tokens in order to look ahead *) + mutable last_token : s_token; + (* The last returned token. It is only important whether it is + * EncodedWord or not. + *) + } +;; + +type mime_scanner = scanner_spec * scanner_target +;; + +let get_pos_of_scanner (spec, target) = + if spec.opt_recognize_encoded_words then + failwith "get_pos_of_scanner" + else + target.scanner_pos +;; + +let get_line_of_scanner (spec, target) = + if spec.opt_recognize_encoded_words then + failwith "get_line_of_scanner" + else + target.scanner_line +;; + +let get_column_of_scanner (spec, target) = + if spec.opt_recognize_encoded_words then + failwith "get_column_of_scanner" + else + target.scanner_pos - target.scanner_linepos +;; + +let create_mime_scanner ~specials ~scan_options = + let is_spcl = Array.create 256 false in + List.iter + (fun c -> is_spcl.( Char.code c ) <- true) + specials; + let spec = + { scanner_specials = specials; + scanner_options = scan_options; + opt_no_backslash_escaping = + List.mem No_backslash_escaping scan_options; + opt_return_comments = + List.mem Return_comments scan_options; + opt_recognize_encoded_words = + List.mem Recognize_encoded_words scan_options; + is_special = is_spcl; + space_is_special = is_spcl.(32); + } + in + (* Grab the remaining arguments: *) + fun ?(pos=0) ?(line=1) ?(column=0) s -> + let target = + { scanned_string = s; + scanner_pos = pos; + scanner_line = line; + scanner_linepos = pos - column; + scanned_tokens = Queue.create(); + last_token = Comment; (* Must not be initialized with EncodedWord *) + } + in + spec, target +;; + + +let encoded_word_re = + Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";; + +let scan_next_token ((spec,target) as scn) = + let mk_pair t len = + { token = t; + token_pos = target.scanner_pos; + token_line = target.scanner_line; + token_linepos = target.scanner_linepos; + token_len = len; + token_sep = false; + }, + t + in + + (* Note: mk_pair creates a new token pair, and it assumes that + * target.scanner_pos (and also scanner_line and scanner_linepos) + * still contain the position of the beginning of the token. + *) + + let s = target.scanned_string in + let l = String.length s in + let rec scan i = + if i < l then begin + let c = s.[i] in + if spec.is_special.( Char.code c ) then begin + let pair = mk_pair (Special c) 1 in + target.scanner_pos <- target.scanner_pos + 1; + (match c with + '\n' -> + target.scanner_line <- target.scanner_line + 1; + target.scanner_linepos <- target.scanner_pos; + | _ -> () + ); + pair + end + else + match c with + '"' -> + (* Quoted string: *) + scan_qstring (i+1) (i+1) 0 + | '(' -> + (* Comment: *) + let i', line, linepos = + scan_comment (i+1) 0 target.scanner_line target.scanner_linepos + in + let advance() = + target.scanner_pos <- i'; + target.scanner_line <- line; + target.scanner_linepos <- linepos + in + if spec.opt_return_comments then begin + let pair = mk_pair Comment (i' - i) in + advance(); + pair + end + else + if spec.space_is_special then begin + let pair = mk_pair (Special ' ') (i' - i) in + advance(); + pair + end + else begin + advance(); + scan i' + end + | (' '|'\t'|'\r') -> + (* Ignore whitespace by default: *) + target.scanner_pos <- target.scanner_pos + 1; + scan (i+1) + | '\n' -> + (* Ignore whitespace by default: *) + target.scanner_pos <- target.scanner_pos + 1; + target.scanner_line <- target.scanner_line + 1; + target.scanner_linepos <- target.scanner_pos; + scan (i+1) + | ('\000'..'\031'|'\127'..'\255') -> + let pair = mk_pair (Control c) 1 in + target.scanner_pos <- target.scanner_pos + 1; + pair + | '[' -> + (* Domain literal: *) + scan_dliteral (i+1) (i+1) 0 + | _ -> + scan_atom i i + end + else + mk_pair End 0 + + and scan_atom i0 i = + let return_atom() = + let astring = String.sub s i0 (i-i0) in + let r = + if spec.opt_recognize_encoded_words then + Str.string_match ~groups:4 encoded_word_re astring 0 + else + None + in + match r with + None -> + (* An atom contains never a linefeed character, so we can ignore + * scanner_line here. + *) + let pair = mk_pair (Atom astring) (i-i0) in + target.scanner_pos <- i; + pair + | Some mr -> + (* Found an encoded word. *) + let charset = Str.matched_group mr 1 astring in + let encoding = Str.matched_group mr 2 astring in + let content = Str.matched_group mr 3 astring in + let t = EncodedWord(String.uppercase charset, + String.uppercase encoding, + content) in + let pair = mk_pair t (i-i0) in + target.scanner_pos <- i; + pair + in + + if i < l then + let c = s.[i] in + match c with + ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') -> + return_atom() + | _ -> + if spec.is_special.( Char.code c ) then + return_atom() + else + scan_atom i0 (i+1) + else + return_atom() + + and scan_qstring i0 i n = + if i < l then + let c = s.[i] in + match c with + '"' -> + (* Regular end of the quoted string: *) + let content, line, linepos = copy_qstring i0 (i-1) n in + let pair = mk_pair (QString content) (i-i0+2) in + target.scanner_pos <- i+1; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + | '\\' when not spec.opt_no_backslash_escaping -> + scan_qstring i0 (i+2) (n+1) + | _ -> + scan_qstring i0 (i+1) (n+1) + else + (* Missing right double quote *) + let content, line, linepos = copy_qstring i0 (l-1) n in + let pair = mk_pair (QString content) (l-i0+1) in + target.scanner_pos <- l; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + + and copy_qstring i0 i1 n = + (* Used for quoted strings and for domain literals *) + let r = String.create n in + let k = ref 0 in + let line = ref target.scanner_line in + let linepos = ref target.scanner_linepos in + for i = i0 to i1 do + let c = s.[i] in + match c with + '\\' when i < i1 && not spec.opt_no_backslash_escaping -> () + | '\n' -> + line := !line + 1; + linepos := i+1; + r.[ !k ] <- c; + incr k + | _ -> + r.[ !k ] <- c; + incr k + done; + assert (!k = n); + r, !line, !linepos + + and scan_dliteral i0 i n = + if i < l then + let c = s.[i] in + match c with + ']' -> + (* Regular end of the domain literal: *) + let content, line, linepos = copy_qstring i0 (i-1) n in + let pair = mk_pair (DomainLiteral content) (i-i0+2) in + target.scanner_pos <- i+1; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + | '\\' when not spec.opt_no_backslash_escaping -> + scan_dliteral i0 (i+2) (n+1) + | _ -> + (* Note: '[' is not allowed by RFC 822; we treat it here as + * a regular character (questionable) + *) + scan_dliteral i0 (i+1) (n+1) + else + (* Missing right bracket *) + let content, line, linepos = copy_qstring i0 (l-1) n in + let pair = mk_pair (DomainLiteral content) (l-i0+1) in + target.scanner_pos <- l; + target.scanner_line <- line; + target.scanner_linepos <- linepos; + pair + + + and scan_comment i level line linepos = + if i < l then + let c = s.[i] in + match c with + ')' -> + (i+1), line, linepos + | '(' -> + (* nested comment *) + let i', line', linepos' = + scan_comment (i+1) (level+1) line linepos + in + scan_comment i' level line' linepos' + | '\\' when not spec.opt_no_backslash_escaping -> + if (i+1) < l && s.[i+1] = '\n' then + scan_comment (i+2) level (line+1) (i+2) + else + scan_comment (i+2) level line linepos + | '\n' -> + scan_comment (i+1) level (line+1) (i+1) + | _ -> + scan_comment (i+1) level line linepos + else + (* Missing closing ')' *) + i, line, linepos + in + + scan target.scanner_pos +;; + + +let scan_token ((spec,target) as scn) = + (* This function handles token queueing in order to recognize white space + * that separates adjacent encoded words. + *) + + let rec collect_whitespace () = + (* Scans whitespace tokens and returns them as: + * (ws_list, other_tok) if there is some other_tok following the + * list (other_tok = End is possible) + *) + let (et, t) as pair = scan_next_token scn in + ( match t with + (Special ' '|Special '\t'|Special '\n'|Special '\r') -> + let ws_list, tok = collect_whitespace() in + pair :: ws_list, tok + | _ -> + [], pair + ) + in + + try + (* Is there an already scanned token in the queue? *) + let et = Queue.take target.scanned_tokens in + let t = et.token in + target.last_token <- t; + et, et.token + with + Queue.Empty -> + (* If not: inspect the last token. If that token is an EncodedWord, + * the next tokens are scanned in advance to determine if there + * are spaces separating two EncodedWords. These tokens are put + * into the queue such that it is avoided that they are scanned + * twice. (The sole purpose of the queue.) + *) + match target.last_token with + EncodedWord(_,_,_) as ew -> + let ws_list, tok = collect_whitespace() in + (* If tok is an EncodedWord, too, the tokens in ws_list must + * be flagged as separating two adjacent encoded words. + *) + ( match tok with + _, EncodedWord(_,_,_) -> + List.iter + (fun (et,t) -> + et.token_sep <- true) + ws_list + | _ -> + () + ); + (* Anyway, queue the read tokens but the first up *) + ( match ws_list with + [] -> + (* Nothing to queue *) + let et, t = tok in + target.last_token <- t; + tok + | (et,t) as pair :: ws_list' -> + List.iter + (fun (et',_) -> + Queue.add et' target.scanned_tokens) + ws_list'; + ( match tok with + | _, End -> + () + | (et',_) -> + Queue.add et' target.scanned_tokens + ); + (* Return the first scanned token *) + target.last_token <- t; + pair + ) + | _ -> + (* Regular case: Scan one token; do not queue it up *) + let (et, t) as pair = scan_next_token scn in + target.last_token <- t; + pair +;; + + +let scan_token_list scn = + let rec collect() = + match scan_token scn with + _, End -> + [] + | pair -> + pair :: collect() + in + collect() +;; + + +let scan_structured_value s specials options = + let rec collect scn = + match scan_token scn with + _, End -> + [] + | _, t -> + t :: collect scn + in + let scn = create_mime_scanner specials options s in + collect scn +;; + + +let specials_rfc822 = + [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];; + + +let specials_rfc2045 = + [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];; + + +let scan_encoded_text_value s = + let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in + let options = [ Recognize_encoded_words ] in + let scn = create_mime_scanner specials options s in + + let rec collect () = + match scan_token scn with + _, End -> + [] + | et, _ when separates_adjacent_encoded_words et -> + collect() + | et, (Special _|Atom _|EncodedWord(_,_,_)) -> + et :: collect () + | _, _ -> + assert false + in + collect() +;; + + +let scan_value_with_parameters s options = + let rec parse_params tl = + match tl with + Atom n :: Special '=' :: Atom v :: tl' -> + (n,v) :: parse_rest tl' + | Atom n :: Special '=' :: QString v :: tl' -> + (n,v) :: parse_rest tl' + | _ -> + failwith "Mimestring.scan_value_with_parameters" + and parse_rest tl = + match tl with + [] -> [] + | Special ';' :: tl' -> + parse_params tl' + | _ -> + failwith "Mimestring.scan_value_with_parameters" + in + + (* Note: Even if not used here, the comma is a very common separator + * and should be recognized as being special. You will get a + * failure if there is a comma in the scanned string. + *) + let tl = scan_structured_value s [ ';'; '='; ',' ] options in + match tl with + [ Atom n ] -> n, [] + | [ QString n ] -> n, [] + | Atom n :: Special ';' :: tl' -> + n, parse_params tl' + | QString n :: Special ';' :: tl' -> + n, parse_params tl' + | _ -> + failwith "Mimestring.scan_value_with_parameters" +;; + + +let scan_mime_type s options = + let n, params = scan_value_with_parameters s options in + (String.lowercase n), + (List.map (fun (n,v) -> (String.lowercase n, v)) params) +;; + + +let lf_re = Str.regexp "[\n]";; + +let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary = + let l_s = String.length s in + if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then + invalid_arg "Mimestring.scan_multipart_body"; + + (* First compile the regexps scanning for 'boundary': *) + let boundary1_re = + Str.regexp ("\n--" ^ Str.quote boundary) in + let boundary2_re = + Str.regexp ("--" ^ Str.quote boundary) in + + let rec parse i = + (* i: Beginning of the current part (position directly after the + * boundary line + *) + (* Search for next boundary at position i *) + let i' = + try min (fst (Str.search_forward boundary1_re s i) + 1) i1 + with + Not_found -> i1 + in + (* i': Either the position of the first '-' of the boundary line, + * or i1 if no boundary has been found + *) + if i' >= i1 then + [] (* Ignore everything after the last boundary *) + else + let i'' = + try min (fst (Str.search_forward lf_re s i') + 1) i1 + with + Not_found -> i1 + in + (* i'': The position after the boundary line *) +(* + print_int i; print_newline(); + print_int i'; print_newline(); + print_int i''; print_newline(); + flush stdout; +*) + let header, k = scan_header s i i' in + (* header: the header of the part + * k: beginning of the body + *) + + let value = + (* We know that i'-1 is a linefeed character. i'-2 should be a CR + * character. Both characters are not part of the value. + *) + if i' >= 2 then + match s.[i'-2] with + '\013' -> String.sub s k (i'-2-k) + | _ -> String.sub s k (i'-1-k) + else + String.sub s k (i'-1-k) + in + + let pair = + (header, value) in + + if i'' >= i1 + then + [ pair ] + else + pair :: parse i'' + in + + (* Find the first boundary. This is a special case, because it may be + * right at the beginning of the string (no preceding CRLF) + *) + + let i_bnd = + if Str.string_partial_match boundary2_re s i0 <> None then + i0 + else + try min (fst (Str.search_forward boundary1_re s i0)) i1 + with + Not_found -> i1 + in + + if i_bnd >= i1 then + [] + else + let i_bnd' = + try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1 + with + Not_found -> i1 + in + if i_bnd' >= i1 then + [] + else + parse i_bnd' +;; + + +let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary = + let parts = scan_multipart_body s i0 i1 boundary in + List.map + (fun (params, value) -> + let encoding = + try List.assoc "content-transfer-encoding" params + with Not_found -> "7bit" + in + + (* NOTE: In the case of "base64" and "quoted-printable", the allocation + * of the string "value" could be avoided. + *) + + let value' = + match encoding with + ("7bit"|"8bit"|"binary") -> value + | "base64" -> + Netencoding.Base64.decode_substring + value 0 (String.length value) false true + | "quoted-printable" -> + Netencoding.QuotedPrintable.decode_substring + value 0 (String.length value) + | _ -> + failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding" + in + (params, value') + ) + parts +;; + + +let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop = + + (* The block size of s must be at least the length of the boundary + 3. + * Otherwise it is not guaranteed that the boundary is always recognized. + *) + if Netstream.block_size s < String.length boundary + 3 then + invalid_arg "Mimestring.scan_multipart_body_from_netstream"; + + (* First compile the regexps scanning for 'boundary': *) + let boundary1_re = + Str.regexp ("\n--" ^ Str.quote boundary) in + let boundary2_re = + Str.regexp ("--" ^ Str.quote boundary) in + + (* Subtask 1: Search the end of the MIME header: CR LF CR LF + * (or LF LF). Enlarge the window until the complete header + * is covered by the window. + *) + let rec search_end_of_header k = + (* Search the end of the header beginning at position k of the + * current window. + * Return the position of the first character of the body. + *) + try + (* Search for LF CR? LF: *) + let i, r = Str.search_forward + end_of_header_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + k + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then + Str.match_end r + else + raise Not_found + with + Not_found -> + (* If the end of the stream is reached, the end of the header + * is missing: Error. + * Otherwise, we try to read another block, and continue. + *) + if Netstream.at_eos s then + failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream"; + let w0 = Netstream.window_length s in + Netstream.want_another_block s; + search_end_of_header (max (w0 - 2) 0) + in + + (* Subtask 2: Search the first boundary line. *) + let rec search_first_boundary() = + (* Search boundary per regexp; return the position of the character + * immediately following the boundary (on the same line), or + * raise Not_found. + *) + try + (* Search boundary per regexp: *) + let i, r = Str.search_forward + boundary1_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + 0 + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then begin + Str.match_end r + end + else raise Not_found + with + Not_found -> + if Netstream.at_eos s then raise Not_found; + (* The regexp did not match: Move the window by one block. + *) + let n = + min + (Netstream.window_length s) + (Netstream.block_size s) + in + Netstream.move s n; + search_first_boundary() + in + + (* Subtask 3: Search the next boundary line. Invoke 'add' for every + * read chunk + *) + let rec search_next_boundary p = + (* Returns the position directly after the boundary on the same line *) + try + (* Search boundary per regexp: *) + let i,r = Str.search_forward + boundary1_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + 0 + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then begin + (* Add the last chunk of the part. *) + let n = + (* i is a LF. i - 1 should be CR. Ignore these characters. *) + if i >= 1 then + match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with + '\013' -> i - 1 + | _ -> i + else + i + in + (* Printf.printf "add n=%d\n" n; *) + add p s 0 n; + Str.match_end r + end + else raise Not_found + with + Not_found -> + if Netstream.at_eos s then + failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found"; + (* The regexp did not match: Add the first block of the window; + * and move the window. + *) + let n = + min + (Netstream.window_length s) + (Netstream.block_size s) + in + (* Printf.printf "add n=%d\n" n; *) + add p s 0 n; + Netstream.move s n; + search_next_boundary p + in + + (* Subtask 4: Search the end of the boundary line *) + let rec search_end_of_line k = + (* Search LF beginning at position k. Discard any contents until that. *) + try + let i,r = Str.search_forward + lf_re + (Netbuffer.unsafe_buffer (Netstream.window s)) + k + in + (* If match_end <= window_length, the search was successful. + * Otherwise, we searched in the uninitialized region of the + * buffer. + *) + if Str.match_end r <= Netstream.window_length s then begin + Str.match_end r + end + else raise Not_found + with + Not_found -> + if Netstream.at_eos s then + failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end"; + (* The regexp did not match: move the window. + *) + let n = Netstream.window_length s in + Netstream.move s n; + search_end_of_line 0 + in + + (* Subtask 5: Check whether "--" follows the boundary on the same line *) + let check_whether_last_boundary k = + (* k: The position directly after the boundary. *) + Netstream.want s (k+2); + let str = Netbuffer.unsafe_buffer (Netstream.window s) in + (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-' + in + + (* Subtask 6: Check whether the buffer begins with a boundary. *) + let check_beginning_is_boundary () = + let m = String.length boundary + 2 in + Netstream.want s m; + let str = Netbuffer.unsafe_buffer (Netstream.window s) in + (Netstream.window_length s >= m) && + (Str.string_partial_match boundary2_re str 0 <> None) + in + + let rec parse_part () = + (* The first byte of the current window of s contains the character + * directly following the boundary line that starts this part. + *) + (* Search the end of the MIME header: *) + let k_eoh = search_end_of_header 0 in + (* Printf.printf "k_eoh=%d\n" k_eoh; *) + (* Get the MIME header: *) + let str = Netbuffer.unsafe_buffer (Netstream.window s) in + let header, k_eoh' = scan_header str 0 k_eoh in + assert (k_eoh = k_eoh'); + (* Move the window over the header: *) + Netstream.move s k_eoh; + (* Create the part: *) + let p = create header in + let continue = + begin try + (* Search the next boundary; add the chunks while searching: *) + let k_eob = search_next_boundary p in + (* Printf.printf "k_eob=%d\n" k_eob; *) + (* Is this the last boundary? *) + if check_whether_last_boundary k_eob then begin + (* Skip the rest: *) + while not (Netstream.at_eos s) do + Netstream.move s (Netstream.window_length s) + done; + Netstream.move s (Netstream.window_length s); + false + end + else begin + (* Move to the beginning of the next line: *) + let k_eol = search_end_of_line k_eob in + Netstream.move s k_eol; + true + end + with + any -> + (try stop p with _ -> ()); + raise any + end in + stop p; + if continue then + (* Continue with next part: *) + parse_part() + in + + (* Check whether s directly begins with a boundary: *) + if check_beginning_is_boundary() then begin + (* Move to the beginning of the next line: *) + let k_eol = search_end_of_line 0 in + Netstream.move s k_eol; + (* Begin with first part: *) + parse_part() + end + else begin + (* Search the first boundary: *) + try + let k_eob = search_first_boundary() in + (* Printf.printf "k_eob=%d\n" k_eob; *) + (* Move to the beginning of the next line: *) + let k_eol = search_end_of_line k_eob in + (* Printf.printf "k_eol=%d\n" k_eol; *) + Netstream.move s k_eol; + (* Begin with first part: *) + parse_part() + with + Not_found -> + (* No boundary at all: The body is empty. *) + () + end; +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.8 2000/08/13 00:04:36 gerd + * Encoded_word -> EncodedWord + * Bugfixes. + * + * Revision 1.7 2000/08/07 00:25:14 gerd + * Implemented the new functions for structured field lexing. + * + * Revision 1.6 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.5 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.4 2000/05/16 22:30:14 gerd + * Added support for some types of malformed MIME messages. + * + * Revision 1.3 2000/04/15 13:09:01 gerd + * Implemented uploads to temporary files. + * + * Revision 1.2 2000/03/02 01:15:30 gerd + * Updated. + * + * Revision 1.1 2000/02/25 15:21:12 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/mimestring.mli b/helm/DEVEL/pxp/netstring/mimestring.mli new file mode 100644 index 000000000..39634b59c --- /dev/null +++ b/helm/DEVEL/pxp/netstring/mimestring.mli @@ -0,0 +1,683 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(**********************************************************************) +(* Collection of auxiliary functions to parse MIME headers *) +(**********************************************************************) + + +val scan_header : + ?unfold:bool -> + string -> start_pos:int -> end_pos:int -> + ((string * string) list * int) + (* let params, i2 = scan_header s i0 i1: + * + * DESCRIPTION + * + * Scans the MIME header that begins at position i0 in the string s + * and that must end somewhere before position i1. It is intended + * that in i1 the character position following the end of the body of the + * MIME message is passed. + * Returns the parameters of the header as (name,value) pairs (in + * params), and in i2 the position of the character following + * directly after the header (i.e. after the blank line separating + * the header from the body). + * The following normalizations have already been applied: + * - The names are all in lowercase + * - Newline characters (CR and LF) have been removed (unless + * ?unfold:false has been passed) + * - Whitespace at the beginning and at the end of values has been + * removed (unless ?unfold:false is specified) + * The rules of RFC 2047 have NOT been applied. + * The function fails if the header violates the header format + * strongly. (Some minor deviations are tolerated, e.g. it is sufficient + * to separate lines by only LF instead of CRLF.) + * + * OPTIONS: + * + * unfold: If true (the default), folded lines are concatenated and + * returned as one line. This means that CR and LF characters are + * deleted and that whitespace at the beginning and the end of the + * string is removed. + * You may set ?unfold:false to locate individual characters in the + * parameter value exactly. + * + * ABOUT MIME MESSAGE FORMAT: + * + * This is the modern name for messages in "E-Mail format". Messages + * consist of a header and a body; the first empty line separates both + * parts. The header contains lines "param-name: param-value" where + * the param-name must begin on column 0 of the line, and the ":" + * separates the name and the value. So the format is roughly: + * + * param1-name: param1-value + * ... + * paramN-name: paramN-value + * + * body + * + * This function wants in i0 the position of the first character of + * param1-name in the string, and in i1 the position of the character + * following the body. It returns as i2 the position where the body + * begins. Furthermore, in 'params' all parameters are returned that + * exist in the header. + * + * DETAILS + * + * Note that parameter values are restricted; you cannot represent + * arbitrary strings. The following problems can arise: + * - Values cannot begin with whitespace characters, because there + * may be an arbitrary number of whitespaces between the ':' and the + * value. + * - Values (and names of parameters, too) must only be formed of + * 7 bit ASCII characters. (If this is not enough, the MIME standard + * knows the extension RFC 2047 that allows that header values may + * be composed of arbitrary characters of arbitrary character sets.) + * - Header values may be broken into several lines, the continuation + * lines must begin with whitespace characters. This means that values + * must not contain line breaks as semantical part of the value. + * And it may mean that ONE whitespace character is not distinguishable + * from SEVERAL whitespace characters. + * - Header lines must not be longer than 76 characters. Values that + * would result into longer lines must be broken into several lines. + * This means that you cannot represent strings that contain too few + * whitespace characters. + * - Some gateways pad the lines with spaces at the end of the lines. + * + * This implementation of a MIME scanner tolerates a number of + * deviations from the standard: long lines are not rejected; 8 bit + * values are accepted; lines may be ended only with LF instead of + * CRLF. + * Furthermore, header values are transformed: + * - leading and trailing spaces are always removed + * - CRs and LFs are deleted; it is guaranteed that there is at least + * one space or tab where CR/LFs are deleted. + * Last but not least, the names of the header values are converted + * to lowercase; MIME specifies that they are case-independent. + * + * COMPATIBILITY WITH THE STANDARD + * + * This function can parse all MIME headers that conform to RFC 822. + * But there may be still problems, as RFC 822 allows some crazy + * representations that are actually not used in practice. + * In particular, RFC 822 allows it to use backslashes to "indicate" + * that a CRLF sequence is semantically meant as line break. As this + * function normally deletes CRLFs, it is not possible to recognize such + * indicators in the result of the function. + *) + +(**********************************************************************) + +(* The following types and functions allow it to build scanners for + * structured MIME values in a highly configurable way. + * + * WHAT ARE STRUCTURED VALUES? + * + * RFC 822 (together with some other RFCs) defines lexical rules + * how formal MIME header values should be divided up into tokens. Formal + * MIME headers are those headers that are formed according to some + * grammar, e.g. mail addresses or MIME types. + * Some of the characters separate phrases of the value; these are + * the "special" characters. For example, '@' is normally a special + * character for mail addresses, because it separates the user name + * from the domain name. RFC 822 defines a fixed set of special + * characters, but other RFCs use different sets. Because of this, + * the following functions allow it to configure the set of special characters. + * Every sequence of characters may be embraced by double quotes, + * which means that the sequence is meant as literal data item; + * special characters are not recognized inside a quoted string. You may + * use the backslash to insert any character (including double quotes) + * verbatim into the quoted string (e.g. "He said: \"Give it to me!\""). + * The sequence of a backslash character and another character is called + * a quoted pair. + * Structured values may contain comments. The beginning of a comment + * is indicated by '(', and the end by ')'. Comments may be nested. + * Comments may contain quoted pairs. A + * comment counts as if a space character were written instead of it. + * Control characters are the ASCII characters 0 to 31, and 127. + * RFC 822 demands that MIME headers are 7 bit ASCII strings. Because + * of this, this function also counts the characters 128 to 255 as + * control characters. + * Domain literals are strings embraced by '[' and ']'; such literals + * may contain quoted pairs. Today, domain literals are used to specify + * IP addresses. + * Every character sequence not falling in one of the above categories + * is an atom (a sequence of non-special and non-control characters). + * When recognized, atoms may be encoded in a character set different than + * US-ASCII; such atoms are called encoded words (see RFC 2047). + * + * EXTENDED INTERFACE: + * + * In order to scan a string containing a MIME value, you must first + * create a mime_scanner using the function create_mime_scanner. + * The scanner contains the reference to the scanned string, and a + * specification how the string is to be scanned. The specification + * consists of the lists 'specials' and 'scan_options'. + * + * The character list 'specials' specifies the set of special characters. + * These characters are returned as Special c token; the following additional + * rules apply: + * + * - Spaces: + * If ' ' in specials: A space character is returned as Special ' '. + * Note that there may also be an effect on how comments are returned + * (see below). + * If ' ' not in specials: Spaces are ignored. + * + * - Tabs, CRs, LFs: + * If '\t' in specials: A tab character is returned as Special '\t'. + * If '\t' not in specials: Tabs are ignored. + * + * If '\r' in specials: A CR character is returned as Special '\r'. + * If '\r' not in specials: CRs are ignored. + * + * If '\n' in specials: A LF character is returned as Special '\n'. + * If '\n' not in specials: LFs are ignored. + * + * - Comments: + * If '(' in specials: Comments are not recognized. The character '(' + * is returned as Special '('. + * If '(' not in specials: Comments are recognized. How comments are + * returned, depends on the following: + * If Return_comments in scan_options: Outer comments are returned as + * Comment (note that inner comments count but + * are not returned as tokens) + * If otherwise ' ' in specials: Outer comments are returned as + * Special ' ' + * Otherwise: Comments are recognized but ignored. + * + * - Quoted strings: + * If '"' in specials: Quoted strings are not recognized, and double quotes + * are returned as Special '"'. + * If '"' not in specials: Quoted strings are returned as QString tokens. + * + * - Domain literals: + * If '[' in specials: Domain literals are not recognized, and left brackets + * are returned as Special '['. + * If '[' not in specials: Domain literals are returned as DomainLiteral + * tokens. + * + * Note that the rule for domain literals is completely new in netstring-0.9. + * It may cause incompatibilities with previous versions if '[' is not + * special. + * + * The general rule for special characters: Every special character c is + * returned as Special c, and any additional scanning functionality + * for this character is turned off. + * + * If recognized, quoted strings are returned as QString s, where + * s is the string without the embracing quotes, and with already + * decoded quoted pairs. + * + * Control characters c are returned as Control c. + * + * If recognized, comments may either be returned as spaces (in the case + * you are not interested in the contents of comments), or as Comment tokens. + * The contents of comments are not further scanned; you must start a + * subscanner to analyze comments as structured values. + * + * If recognized, domain literals are returned as DomainLiteral s, where + * s is the literal without brackets, and with decoded quoted pairs. + * + * Atoms are returned as Atom s where s is a longest sequence of + * atomic characters (all characters which are neither special nor control + * characters nor delimiters for substructures). If the option + * Recognize_encoded_words is on, atoms which look like encoded words + * are returned as EncodedWord tokens. (Important note: Neither '?' nor + * '=' must be special in order to enable this functionality.) + * + * After the mime_scanner has been created, you can scan the tokens by + * invoking scan_token which returns one token at a time, or by invoking + * scan_token_list which returns all following tokens. + * + * There are two token types: s_token is the base type and is intended to + * be used for pattern matching. s_extended_token is a wrapper that + * additionally contains information where the token occurs. + * + * SIMPLE INTERFACE + * + * Instead of creating a mime_scanner and calling the scan functions, + * you may also invoke scan_structured_value. This function returns the + * list of tokens directly; however, it is restricted to s_token. + * + * EXAMPLES + * + * scan_structured_value "user@domain.com" [ '@'; '.' ] [] + * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] + * + * scan_structured_value "user @ domain . com" [ '@'; '.' ] [] + * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] + * + * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] [] + * = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] + * + * scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] + * [ Return_comments ] + * = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.'; + * Atom "com" ] + * + * scan_structured_value "user (Do you know him?) @ domain . com" + * [ '@'; '.'; ' ' ] [] + * = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@'; + * Special ' '; Atom "domain"; + * Special ' '; Special '.'; Special ' '; Atom "com" ] + * + * scan_structured_value "user (Do you know him?) @ domain . com" + * [ '@'; '.'; ' ' ] [ Return_comments ] + * = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@'; + * Special ' '; Atom "domain"; + * Special ' '; Special '.'; Special ' '; Atom "com" ] + * + * scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] [] + * = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain"; + * Special ' '; Special '.'; Special ' '; Atom "com" ] + * + * scan_structured_value "user(Do you know him?)@domain.com" ['@'; '.'; '('] + * [] + * = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know"; + * Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] + * + * scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] [] + * = [ QString "My.name"; Special '@'; Atom "domain"; Special '.'; + * Atom "com" ] + * + * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" + * [ ] [ ] + * = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ] + * + * scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" + * [ ] [ Recognize_encoded_words ] + * = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ] + * + *) + + + +type s_token = + Atom of string + | EncodedWord of (string * string * string) + | QString of string + | Control of char + | Special of char + | DomainLiteral of string + | Comment + | End + +(* - Words are: Atom, EncodedWord, QString. + * - Atom s: The character sequence forming the atom is contained in s + * - EncodedWord(charset, encoding, encoded_string) means: + * * charset is the (uppercase) character set + * * encoding is either "Q" or "B" + * * encoded_string: contains the text of the word; the text is represented + * as octet string following the conventions for character set charset and + * then encoded either as "Q" or "B" string. + * - QString s: Here, s are the characters inside the double quotes after + * decoding any quoted pairs (backslash + character pairs) + * - Control c: The control character c + * - Special c: The special character c + * - DomainLiteral s: s contains the characters inside the brackets after + * decoding any quoted pairs + * - Comment: if the option Return_comments is specified, this token + * represents the whole comment. + * - End: Is returned after the last token + *) + + +type s_option = + No_backslash_escaping + (* Do not handle backslashes in quoted string and comments as escape + * characters; backslashes are handled as normal characters. + * For example: "C:\dir\file" will be returned as + * QString "C:\dir\file", and not as QString "C:dirfile". + * - This is a common error in many MIME implementations. + *) + | Return_comments + (* Comments are returned as token Comment (unless '(' is included + * in the list of special characters, in which case comments are + * not recognized at all). + * You may get the exact location of the comment by applying + * get_pos and get_length to the extended token. + *) + | Recognize_encoded_words + (* Enables that encoded words are recognized and returned as + * EncodedWord(charset,encoding,content) instead of Atom. + *) + +type s_extended_token + (* An opaque type containing s_token plus: + * - where the token occurs + * - RFC-2047 access functions + *) + +val get_token : s_extended_token -> s_token + (* Return the s_token within the s_extended_token *) + +val get_decoded_word : s_extended_token -> string +val get_charset : s_extended_token -> string + (* Return the decoded word (the contents of the word after decoding the + * "Q" or "B" representation), and the character set of the decoded word + * (uppercase). + * These functions not only work for EncodedWord: + * - Atom: Returns the atom without decoding it + * - QString: Returns the characters inside the double quotes, and + * decodes any quoted pairs (backslash + character) + * - Control: Returns the one-character string + * - Special: Returns the one-character string + * - DomainLiteral: Returns the characters inside the brackets, and + * decodes any quoted pairs + * - Comment: Returns "" + * The character set is "US-ASCII" for these tokens. + *) + +val get_pos : s_extended_token -> int + (* Return the byte position where the token starts in the string + * (the first byte has position 0) + *) + +val get_line : s_extended_token -> int + (* Return the line number where the token starts (numbering begins + * usually with 1) + *) + +val get_column : s_extended_token -> int + (* Return the column of the line where the token starts (first column + * is number 0) + *) + +val get_length : s_extended_token -> int + (* Return the length of the token in bytes *) + +val separates_adjacent_encoded_words : s_extended_token -> bool + (* True iff the current token is white space (Special ' ', Special '\t', + * Special '\r' or Special '\n') and the last non-white space token + * was EncodedWord and the next non-white space token will be + * EncodedWord. + * Such spaces do not count and must be ignored by any application. + *) + + +type mime_scanner + +val create_mime_scanner : + specials:char list -> + scan_options:s_option list -> + ?pos:int -> + ?line:int -> + ?column:int -> + string -> + mime_scanner + (* Creates a new mime_scanner scanning the passed string. + * specials: The list of characters recognized as special characters. + * scan_options: The list of global options modifying the behaviour + * of the scanner + * pos: The position of the byte where the scanner starts in the + * passed string. Defaults to 0. + * line: The line number of this byte. Defaults to 1. + * column: The column number of this byte. Default to 0. + * + * The optional parameters pos, line, column are intentionally after + * scan_options and before the string argument, so you can specify + * scanners by partially applying arguments to create_mime_scanner + * which are not yet connected with a particular string: + * let my_scanner_spec = create_mime_scanner my_specials my_options in + * ... + * let my_scanner = my_scanner_spec my_string in + * ... + *) + +val get_pos_of_scanner : mime_scanner -> int +val get_line_of_scanner : mime_scanner -> int +val get_column_of_scanner : mime_scanner -> int + (* Return the current position, line, and column of a mime_scanner. + * The primary purpose of these functions is to simplify switching + * from one mime_scanner to another within a string: + * + * let scanner1 = create_mime_scanner ... s in + * ... now scanning some tokens from s using scanner1 ... + * let scanner2 = create_mime_scanner ... + * ?pos:(get_pos_of_scanner scanner1) + * ?line:(get_line_of_scanner scanner1) + * ?column:(get_column_of_scanner scanner1) + * s in + * ... scanning more tokens from s using scanner2 ... + * + * RESTRICTION: These functions are not available if the option + * Recognize_encoded_words is on. The reason is that this option + * enables look-ahead scanning; please use the location of the last + * scanned token instead. + * It is currently not clear whether a better implementation is needed + * (costs a bit more time). + * + * Note: To improve the performance of switching, it is recommended to + * create scanner specs in advance (see the example my_scanner_spec + * above). + *) + +val scan_token : mime_scanner -> (s_extended_token * s_token) + (* Returns the next token, or End if there is no more token. *) + +val scan_token_list : mime_scanner -> (s_extended_token * s_token) list + (* Returns all following tokens as a list (excluding End) *) + +val scan_structured_value : string -> char list -> s_option list -> s_token list + (* This function is included for backwards compatibility, and for all + * cases not requiring extended tokens. + * + * It scans the passed string according to the list of special characters + * and the list of options, and returns the list of all tokens. + *) + +val specials_rfc822 : char list +val specials_rfc2045 : char list + (* The sets of special characters defined by the RFCs 822 and 2045. + * + * CHANGE in netstring-0.9: '[' and ']' are no longer special because + * there is now support for domain literals. + * '?' and '=' are not special in the rfc2045 version because there is + * already support for encoded words. + *) + + +(**********************************************************************) + +(* Widely used scanners: *) + + +val scan_encoded_text_value : string -> s_extended_token list + (* Scans a "text" value. The returned token list contains only + * Special, Atom and EncodedWord tokens. + * Spaces, TABs, CRs, LFs are returned unless + * they occur between adjacent encoded words in which case + * they are ignored. + *) + + +val scan_value_with_parameters : string -> s_option list -> + (string * (string * string) list) + (* let name, params = scan_value_with_parameters s options: + * Scans phrases like + * name ; p1=v1 ; p2=v2 ; ... + * The scan is done with the set of special characters [';', '=']. + *) + +val scan_mime_type : string -> s_option list -> + (string * (string * string) list) + (* let name, params = scan_mime_type s options: + * Scans MIME types like + * text/plain; charset=iso-8859-1 + * The name of the type and the names of the parameters are converted + * to lower case. + *) + + +(**********************************************************************) + +(* Scanners for MIME bodies *) + +val scan_multipart_body : string -> start_pos:int -> end_pos:int -> + boundary:string -> + ((string * string) list * string) list + (* let [params1, value1; params2, value2; ...] + * = scan_multipart_body s i0 i1 b + * + * Scans the string s that is the body of a multipart message. + * The multipart message begins at position i0 in s and i1 the position + * of the character following the message. In b the boundary string + * must be passed (this is the "boundary" parameter of the multipart + * MIME type, e.g. multipart/mixed;boundary="some string" ). + * The return value is the list of the parts, where each part + * is returned as pair (params, value). The left component params + * is the list of name/value pairs of the header of the part. The + * right component is the RAW content of the part, i.e. if the part + * is encoded ("content-transfer-encoding"), the content is returned + * in the encoded representation. The caller must himself decode + * the content. + * The material before the first boundary and after the last + * boundary is not returned. + * + * MULTIPART MESSAGES + * + * The MIME standard defines a way to group several message parts to + * a larger message (for E-Mails this technique is known as "attaching" + * files to messages); these are the so-called multipart messages. + * Such messages are recognized by the major type string "multipart", + * e.g. multipart/mixed or multipart/form-data. Multipart types MUST + * have a boundary parameter because boundaries are essential for the + * representation. + * Multipart messages have a format like + * + * ...Header... + * Content-type: multipart/xyz; boundary="abc" + * ...Header... + * + * Body begins here ("prologue") + * --abc + * ...Header part 1... + * + * ...Body part 1... + * --abc + * ...Header part 2... + * + * + * ...Body part 2 + * --abc + * ... + * --abc-- + * Epilogue + * + * The parts are separated by boundary lines which begin with "--" and + * the string passed as boundary parameter. (Note that there may follow + * arbitrary text on boundary lines after "--abc".) The boundary is + * chosen such that it does not occur as prefix of any line of the + * inner parts of the message. + * The parts are again MIME messages, with header and body. Note + * that it is explicitely allowed that the parts are even multipart + * messages. + * The texts before the first boundary and after the last boundary + * are ignored. + * Note that multipart messages as a whole MUST NOT be encoded. + * Only the PARTS of the messages may be encoded (if they are not + * multipart messages themselves). + * + * Please read RFC 2046 if want to know the gory details of this + * brain-dead format. + *) + +val scan_multipart_body_and_decode : string -> start_pos:int -> end_pos:int -> + boundary:string -> + ((string * string) list * string) list + (* Same as scan_multipart_body, but decodes the bodies of the parts + * if they are encoded using the methods "base64" or "quoted printable". + * Fails, if an unknown encoding is used. + *) + +val scan_multipart_body_from_netstream + : Netstream.t -> + boundary:string -> + create:((string * string) list -> 'a) -> + add:('a -> Netstream.t -> int -> int -> unit) -> + stop:('a -> unit) -> + unit + (* scan_multipart_body_from_netstream s b create add stop: + * + * Reads the MIME message from the netstream s block by block. The + * parts are delimited by the boundary b. + * + * Once a new part is detected and begins, the function 'create' is + * called with the MIME header as argument. The result p of this function + * may be of any type. + * + * For every chunk of the part that is being read, the function 'add' + * is invoked: add p s k n. + * Here, p is the value returned by the 'create' invocation for the + * current part. s is the netstream. The current window of s contains + * the read chunk completely; the chunk begins at position k of the + * window (relative to the beginning of the window) and has a length + * of n bytes. + * + * When the part has been fully read, the function 'stop' is + * called with p as argument. + * + * That means, for every part the following is executed: + * - let p = create h + * - add p s k1 n1 + * - add p s k2 n2 + * - ... + * - add p s kN nN + * - stop p + * + * IMPORTANT PRECONDITION: + * - The block size of the netstream s must be at least + * String.length b + 3 + * + * EXCEPTIONS: + * - Exceptions can happen because of ill-formed input, and within + * the callbacks of the functions 'create', 'add', 'stop'. + * - If the exception happens while part p is being read, and the + * 'create' function has already been called (successfully), the + * 'stop' function is also called (you have the chance to close files). + *) + + +(* THREAD-SAFETY: + * The functions are thread-safe as long as the threads do not share + * values. + *) + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.8 2000/08/13 00:04:36 gerd + * Encoded_word -> EncodedWord + * Bugfixes. + * + * Revision 1.7 2000/08/07 00:25:00 gerd + * Major update of the interface for structured field lexing. + * + * Revision 1.6 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.5 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.4 2000/05/16 22:29:12 gerd + * New "option" arguments specifying the level of MIME + * compatibility. + * + * Revision 1.3 2000/04/15 13:09:01 gerd + * Implemented uploads to temporary files. + * + * Revision 1.2 2000/03/02 01:15:30 gerd + * Updated. + * + * Revision 1.1 2000/02/25 15:21:12 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/netbuffer.ml b/helm/DEVEL/pxp/netstring/netbuffer.ml new file mode 100644 index 000000000..d6fc40ff7 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/netbuffer.ml @@ -0,0 +1,145 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +type t = + { mutable buffer : string; + mutable length : int; + } + +(* To help the garbage collector: + * The 'buffer' has a minimum length of 31 bytes. This minimum can still + * be stored in the minor heap. + * The 'buffer' has a length which is always near a multiple of two. This + * limits the number of different bucket sizes, and simplifies reallocation + * of freed memory. + *) + +(* Optimal string length: + * Every string takes: 1 word for the header, enough words for the + * contents + 1 Null byte (for C compatibility). + * If the buffer grows, it is best to use a new string length such + * that the number of words is exactly twice as large as for the previous + * string. + * n: length of the previous string in bytes + * w: storage size of the previous string in words + * n': length of the new string in bytes + * w' = 2*w: storage size of the new string in words + * + * w = (n+1) / word_length + 1 + * [it is assumed that (n+1) is always a multiple of word_length] + * + * n' = (2*w - 1) * word_length - 1 + * + * n' = [2 * ( [n+1] / word_length + 1) - 1] * word_length - 1 + * = ... + * = (2*n + 2) + word_length - 1 + * = 2 * n + word_length + 1 + * + * n'+1 is again a multiple of word_length: + * n'+1 = 2*n + 2 + word_length + * = 2*(n+1) + word_length + * = a multiple of word_length because n+1 is a multiple of word_length + *) + +let word_length = Sys.word_size / 8 (* in bytes *) + +let create n = + { buffer = String.create (max n 31); length = 0; } + +let contents b = + String.sub b.buffer 0 b.length + +let sub b ~pos:k ~len:n = + if k+n > b.length then + raise (Invalid_argument "Netbuffer.sub"); + String.sub b.buffer k n + +let unsafe_buffer b = + b.buffer + +let length b = + b.length + +let add_string b s = + let l = String.length s in + if l + b.length > String.length b.buffer then begin + let l' = l + b.length in + let rec new_size s = + if s >= l' then s else new_size(2*s + word_length + 1) + in + let buffer' = String.create (new_size (String.length b.buffer)) in + String.blit b.buffer 0 buffer' 0 b.length; + b.buffer <- buffer' + end; + String.blit s 0 b.buffer b.length l; + b.length <- b.length + l + +let add_sub_string b s ~pos:k ~len:l = + if l + b.length > String.length b.buffer then begin + let l' = l + b.length in + let rec new_size s = + if s >= l' then s else new_size(2*s + word_length + 1) + in + let buffer' = String.create (new_size (String.length b.buffer)) in + String.blit b.buffer 0 buffer' 0 b.length; + b.buffer <- buffer' + end; + String.blit s k b.buffer b.length l; + b.length <- b.length + l + +let delete b ~pos:k ~len:l = + (* deletes l bytes at position k in b *) + let n = String.length b.buffer in + if k+l <> n & k <> n then + String.blit b.buffer (k+l) b.buffer k (n-l-k); + b.length <- b.length - l; + () + +let try_shrinking b = + (* If the buffer size decreases drastically, reallocate the buffer *) + if b.length < (String.length b.buffer / 2) then begin + let rec new_size s = + if s >= b.length then s else new_size(2*s + word_length + 1) + in + let buffer' = String.create (new_size 31) in + String.blit b.buffer 0 buffer' 0 b.length; + b.buffer <- buffer' + end + +let clear b = + delete b 0 (b.length) + +let index_from b k c = + if k > b.length then + raise (Invalid_argument "Netbuffer.index_from"); + let p = String.index_from b.buffer k c in + if p >= b.length then raise Not_found; + p + +let print_buffer b = + Format.printf + "" + b.length + (String.length b.buffer) +;; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.3 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.2 2000/06/24 20:20:33 gerd + * Added the toploop printer. + * + * Revision 1.1 2000/04/15 13:07:48 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/netbuffer.mli b/helm/DEVEL/pxp/netstring/netbuffer.mli new file mode 100644 index 000000000..0ecd61e6a --- /dev/null +++ b/helm/DEVEL/pxp/netstring/netbuffer.mli @@ -0,0 +1,93 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + + +(* A Netbuffer.t is a buffer that can grow and shrink dynamically. *) + +type t + +val create : int -> t + (* Creates a netbuffer which allocates initially this number of bytes. + * The logical length is zero. + *) + +val contents : t -> string + (* Returns the contents of the buffer as fresh string. *) + +val sub : t -> pos:int -> len:int -> string + (* sub nb k n: returns the n characters starting at position n from + * netbuffer nb as fresh string + *) + +val length : t -> int + (* Returns the logical length of the buffer *) + +val add_string : t -> string -> unit + (* add_string nb s: Adds a copy of the string s to the logical end of + * the netbuffer nb. If necessary, the nb grows. + *) + +val add_sub_string : t -> string -> pos:int -> len:int -> unit + (* add_sub_string nb s k n: Adds the substring of s starting at position + * k with length n to the logical end of the netbuffer nb. If necessary, + * the nb grows. + * This is semantically the same as + * add_string nb (String.sub s k n), but the extra copy is avoided. + *) + +val delete : t -> pos:int -> len:int -> unit + (* delete nb k n: Deletes the n bytes at position k of netbuffer nb + * in-place. + * The netbuffer does not shrink! + *) + +val clear : t -> unit + (* deletes all contents from the buffer. As 'delete', the netbuffer does + * not shrink. + *) + +val try_shrinking : t -> unit + (* try_shrinking nb: If the length of the buffer is less than half of + * the allocated space, the netbuffer is reallocated in order to save + * memory. + *) + +val index_from : t -> int -> char -> int + (* index_from nb k c: Searches the character c in the netbuffer beginning + * at position k. If found, the position of the left-most occurence is + * returned. Otherwise, Not_found is raised. + *) + +val unsafe_buffer : t -> string + (* WARNING! This is a low-level function! + * Returns the current string that internally holds the buffer. + * The byte positions 0 to length - 1 actually store the contents of + * the buffer. You can directly read and modify the buffer. Note that + * there is no protection if you read or write positions beyond the + * length of the buffer. + *) + +val print_buffer : t -> unit + (* For the toploop *) + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.3 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.2 2000/06/24 20:20:33 gerd + * Added the toploop printer. + * + * Revision 1.1 2000/04/15 13:07:48 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/netconversion.ml b/helm/DEVEL/pxp/netstring/netconversion.ml new file mode 100644 index 000000000..e740654ad --- /dev/null +++ b/helm/DEVEL/pxp/netstring/netconversion.ml @@ -0,0 +1,864 @@ +(* $Id$ + * ---------------------------------------------------------------------- + *) + +exception Malformed_code + + +type encoding = + [ `Enc_utf8 (* UTF-8 *) + | `Enc_java + | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *) + | `Enc_utf16_le (* UTF-16 little endian *) + | `Enc_utf16_be (* UTF-16 big endian *) + | `Enc_usascii (* US-ASCII (only 7 bit) *) + | `Enc_iso88591 (* ISO-8859-1 *) + | `Enc_iso88592 (* ISO-8859-2 *) + | `Enc_iso88593 (* ISO-8859-3 *) + | `Enc_iso88594 (* ISO-8859-4 *) + | `Enc_iso88595 (* ISO-8859-5 *) + | `Enc_iso88596 (* ISO-8859-6 *) + | `Enc_iso88597 (* ISO-8859-7 *) + | `Enc_iso88598 (* ISO-8859-8 *) + | `Enc_iso88599 (* ISO-8859-9 *) + | `Enc_iso885910 (* ISO-8859-10 *) + | `Enc_iso885913 (* ISO-8859-13 *) + | `Enc_iso885914 (* ISO-8859-14 *) + | `Enc_iso885915 (* ISO-8859-15 *) + | `Enc_koi8r (* KOI8-R *) + | `Enc_jis0201 (* JIS-0201 *) + (* Microsoft: *) + | `Enc_windows1250 (* WINDOWS-1250 *) + | `Enc_windows1251 (* WINDOWS-1251 *) + | `Enc_windows1252 (* WINDOWS-1252 *) + | `Enc_windows1253 (* WINDOWS-1253 *) + | `Enc_windows1254 (* WINDOWS-1254 *) + | `Enc_windows1255 (* WINDOWS-1255 *) + | `Enc_windows1256 (* WINDOWS-1256 *) + | `Enc_windows1257 (* WINDOWS-1257 *) + | `Enc_windows1258 (* WINDOWS-1258 *) + (* IBM, ASCII-based: *) + | `Enc_cp437 + | `Enc_cp737 + | `Enc_cp775 + | `Enc_cp850 + | `Enc_cp852 + | `Enc_cp855 + | `Enc_cp856 + | `Enc_cp857 + | `Enc_cp860 + | `Enc_cp861 + | `Enc_cp862 + | `Enc_cp863 + | `Enc_cp864 + | `Enc_cp865 + | `Enc_cp866 + | `Enc_cp869 + | `Enc_cp874 + | `Enc_cp1006 + (* IBM, EBCDIC-based: *) + | `Enc_cp037 + | `Enc_cp424 + | `Enc_cp500 + | `Enc_cp875 + | `Enc_cp1026 + (* Adobe: *) + | `Enc_adobe_standard_encoding + | `Enc_adobe_symbol_encoding + | `Enc_adobe_zapf_dingbats_encoding + (* Apple: *) + | `Enc_macroman + + ] +;; + + +let norm_enc_name e = + (* Removes some characters from e; uppercase *) + let e' = String.create (String.length e) in + let rec next i j = + if i < String.length e then + match e.[i] with + ('-'|'_'|'.') -> next (i+1) j + | c -> e'.[j] <- c; next (i+1) (j+1) + else + j + in + let l = next 0 0 in + String.uppercase(String.sub e' 0 l) +;; + + +let encoding_of_string e = + match norm_enc_name e with + ("UTF16"|"UCS2"|"ISO10646UCS2") -> `Enc_utf16 + | "UTF16BE" -> `Enc_utf16_be + | "UTF16LE" -> `Enc_utf16_le + | "UTF8" -> `Enc_utf8 + | ("UTF8JAVA"|"JAVA") -> `Enc_java + | ("USASCII"|"ASCII"|"ISO646US"|"IBM367"|"CP367") -> `Enc_usascii + | ("ISO88591"|"LATIN1"|"IBM819"|"CP819") -> `Enc_iso88591 + | ("ISO88592"|"LATIN2") -> `Enc_iso88592 + | ("ISO88593"|"LATIN3") -> `Enc_iso88593 + | ("ISO88594"|"LATIN4") -> `Enc_iso88594 + | ("ISO88595"|"CYRILLIC") -> `Enc_iso88595 + | ("ISO88596"|"ARABIC"|"ECMA114"|"ASMO708") -> `Enc_iso88596 + | ("ISO88597"|"GREEK"|"GREEK8"|"ELOT928"|"ECMA118") -> `Enc_iso88597 + | ("ISO88598"|"HEBREW") -> `Enc_iso88598 + | ("ISO88599"|"LATIN5") -> `Enc_iso88599 + | ("ISO885910"|"LATIN6") -> `Enc_iso885910 + | "ISO885913" -> `Enc_iso885913 + | "ISO885914" -> `Enc_iso885914 + | "ISO885915" -> `Enc_iso885915 + | "KOI8R" -> `Enc_koi8r + | ("JIS0201"|"JISX0201"|"X0201") -> `Enc_jis0201 + + | "WINDOWS1250" -> `Enc_windows1250 + | "WINDOWS1251" -> `Enc_windows1251 + | "WINDOWS1252" -> `Enc_windows1252 + | "WINDOWS1253" -> `Enc_windows1253 + | "WINDOWS1254" -> `Enc_windows1254 + | "WINDOWS1255" -> `Enc_windows1255 + | "WINDOWS1256" -> `Enc_windows1256 + | "WINDOWS1257" -> `Enc_windows1257 + | "WINDOWS1258" -> `Enc_windows1258 + + | ("CP437"|"IBM437") -> `Enc_cp437 + | ("CP737"|"IBM737") -> `Enc_cp737 + | ("CP775"|"IBM775") -> `Enc_cp775 + | ("CP850"|"IBM850") -> `Enc_cp850 + | ("CP852"|"IBM852") -> `Enc_cp852 + | ("CP855"|"IBM855") -> `Enc_cp855 + | ("CP856"|"IBM856") -> `Enc_cp856 + | ("CP857"|"IBM857") -> `Enc_cp857 + | ("CP860"|"IBM860") -> `Enc_cp860 + | ("CP861"|"IBM861") -> `Enc_cp861 + | ("CP862"|"IBM862") -> `Enc_cp862 + | ("CP863"|"IBM863") -> `Enc_cp863 + | ("CP864"|"IBM864") -> `Enc_cp864 + | ("CP865"|"IBM865") -> `Enc_cp865 + | ("CP866"|"IBM866") -> `Enc_cp866 + | ("CP869"|"IBM869") -> `Enc_cp869 + | ("CP874"|"IBM874") -> `Enc_cp874 + | ("CP1006"|"IBM1006") -> `Enc_cp1006 + + | ("CP037"|"IBM037"|"EBCDICCPUS"|"EBCDICCPCA"|"EBCDICCPWT"| + "EBCDICCPNL") -> `Enc_cp037 + | ("CP424"|"IBM424"|"EBCDICCPHE") -> `Enc_cp424 + | ("CP500"|"IBM500"|"EBCDICCPBE"|"EBCDICCPCH") -> `Enc_cp500 + | ("CP875"|"IBM875") -> `Enc_cp875 + | ("CP1026"|"IBM1026") -> `Enc_cp1026 + + | "ADOBESTANDARDENCODING" -> `Enc_adobe_standard_encoding + | "ADOBESYMBOLENCODING" -> `Enc_adobe_symbol_encoding + | "ADOBEZAPFDINGBATSENCODING" -> `Enc_adobe_zapf_dingbats_encoding + + | "MACINTOSH" -> `Enc_macroman + + | _ -> + failwith "Netconversion.encoding_of_string: unknown encoding" +;; + + +let string_of_encoding (e : encoding) = + (* If there is a "preferred MIME name", this name is returned (see IANA). *) + match e with + `Enc_utf16 -> "UTF-16" + | `Enc_utf16_be -> "UTF-16-BE" + | `Enc_utf16_le -> "UTF-16-LE" + | `Enc_utf8 -> "UTF-8" + | `Enc_java -> "UTF-8-JAVA" + | `Enc_usascii -> "US-ASCII" + | `Enc_iso88591 -> "ISO-8859-1" + | `Enc_iso88592 -> "ISO-8859-2" + | `Enc_iso88593 -> "ISO-8859-3" + | `Enc_iso88594 -> "ISO-8859-4" + | `Enc_iso88595 -> "ISO-8859-5" + | `Enc_iso88596 -> "ISO-8859-6" + | `Enc_iso88597 -> "ISO-8859-7" + | `Enc_iso88598 -> "ISO-8859-8" + | `Enc_iso88599 -> "ISO-8859-9" + | `Enc_iso885910 -> "ISO-8859-10" + | `Enc_iso885913 -> "ISO-8859-13" + | `Enc_iso885914 -> "ISO-8859-14" + | `Enc_iso885915 -> "ISO-8859-15" + | `Enc_koi8r -> "KOI8-R" + | `Enc_jis0201 -> "JIS_X0201" + | `Enc_windows1250 -> "WINDOWS-1250" + | `Enc_windows1251 -> "WINDOWS-1251" + | `Enc_windows1252 -> "WINDOWS-1252" + | `Enc_windows1253 -> "WINDOWS-1253" + | `Enc_windows1254 -> "WINDOWS-1254" + | `Enc_windows1255 -> "WINDOWS-1255" + | `Enc_windows1256 -> "WINDOWS-1256" + | `Enc_windows1257 -> "WINDOWS-1257" + | `Enc_windows1258 -> "WINDOWS-1258" + | `Enc_cp437 -> "CP437" + | `Enc_cp737 -> "CP737" + | `Enc_cp775 -> "CP775" + | `Enc_cp850 -> "CP850" + | `Enc_cp852 -> "CP852" + | `Enc_cp855 -> "CP855" + | `Enc_cp856 -> "CP856" + | `Enc_cp857 -> "CP857" + | `Enc_cp860 -> "CP860" + | `Enc_cp861 -> "CP861" + | `Enc_cp862 -> "CP862" + | `Enc_cp863 -> "CP863" + | `Enc_cp864 -> "CP864" + | `Enc_cp865 -> "CP865" + | `Enc_cp866 -> "CP866" + | `Enc_cp869 -> "CP869" + | `Enc_cp874 -> "CP874" + | `Enc_cp1006 -> "CP1006" + | `Enc_cp037 -> "CP037" + | `Enc_cp424 -> "CP424" + | `Enc_cp500 -> "CP500" + | `Enc_cp875 -> "CP875" + | `Enc_cp1026 -> "CP1026" + | `Enc_adobe_standard_encoding -> "ADOBE-STANDARD-ENCODING" + | `Enc_adobe_symbol_encoding -> "ADOBE-SYMBOL-ENCODING" + | `Enc_adobe_zapf_dingbats_encoding -> "ADOBE-ZAPF-DINGBATS-ENCODING" + | `Enc_macroman -> "MACINTOSH" +;; + + +let read_iso88591 write s_in p_in l_in = + let rec scan k_in k_out c_out = + if k_in < l_in then begin + let p = Char.code s_in.[p_in + k_in] in + let n = write p k_out c_out in + if n < 0 then + k_in, k_out, `Enc_iso88591 + else + scan (k_in + 1) (k_out + n) (c_out + 1) + end + else + k_in, k_out, `Enc_iso88591 + in + scan 0 0 0 +;; + + +let read_usascii write s_in p_in l_in = + let rec scan k_in k_out c_out = + if k_in < l_in then begin + let p = Char.code s_in.[p_in + k_in] in + if p >= 0x80 then raise Malformed_code; + let n = write p k_out c_out in + if n < 0 then + k_in, k_out, `Enc_usascii + else + scan (k_in + 1) (k_out + n) (c_out + 1) + end + else + k_in, k_out, `Enc_usascii + in + scan 0 0 0 +;; + + +let read_8bit m_to_unicode enc write s_in p_in l_in = + let rec scan k_in k_out c_out = + if k_in < l_in then begin + let p_local = Char.code s_in.[p_in + k_in] in + let p_uni = Array.unsafe_get m_to_unicode p_local in + if p_uni < 0 then raise Malformed_code; + let n = write p_uni k_out c_out in + if n < 0 then + k_in, k_out, enc + else + scan (k_in + 1) (k_out + n) (c_out + 1) + end + else + k_in, k_out, enc + in + scan 0 0 0 +;; + + +let read_utf8 is_java write s_in p_in l_in = + let rec scan k_in k_out c_out = + if k_in < l_in then begin + let n_out, n_in = + match s_in.[p_in + k_in] with + '\000' -> + if is_java then raise Malformed_code; + write 0 k_out c_out, 1 + | ('\001'..'\127' as c) -> + write (Char.code c) k_out c_out, 1 + | ('\128'..'\223' as c) -> + if k_in + 1 >= l_in then + -1, 0 + else begin + let n1 = Char.code c in + let n2 = Char.code (s_in.[p_in + k_in + 1]) in + if is_java && (n1 = 0x80 && n2 = 0xc0) then + write 0 k_out c_out, 2 + else begin + if n2 < 128 or n2 > 191 then raise Malformed_code; + let p = ((n1 land 0b11111) lsl 6) lor (n2 land 0b111111) in + if p < 128 then raise Malformed_code; + write p k_out c_out, 2 + end + end + | ('\224'..'\239' as c) -> + if k_in + 2 >= l_in then + -1, 0 + else begin + let n1 = Char.code c in + let n2 = Char.code (s_in.[p_in + k_in + 1]) in + let n3 = Char.code (s_in.[p_in + k_in + 2]) in + if n2 < 128 or n2 > 191 then raise Malformed_code; + if n3 < 128 or n3 > 191 then raise Malformed_code; + let p = + ((n1 land 0b1111) lsl 12) lor + ((n2 land 0b111111) lsl 6) lor + (n3 land 0b111111) + in + if p < 0x800 then raise Malformed_code; + if (p >= 0xd800 && p < 0xe000) then + (* Surrogate pairs are not supported in UTF-8 *) + raise Malformed_code; + if (p >= 0xfffe && p <= 0xffff) then + raise Malformed_code; + write p k_out c_out, 3 + end + | ('\240'..'\247' as c) -> + if k_in + 3 >= l_in then + -1, 0 + else begin + let n1 = Char.code c in + let n2 = Char.code (s_in.[p_in + k_in + 1]) in + let n3 = Char.code (s_in.[p_in + k_in + 2]) in + let n4 = Char.code (s_in.[p_in + k_in + 3]) in + if n2 < 128 or n2 > 191 then raise Malformed_code; + if n3 < 128 or n3 > 191 then raise Malformed_code; + if n4 < 128 or n4 > 191 then raise Malformed_code; + let p = ((n1 land 0b111) lsl 18) lor + ((n2 land 0b111111) lsl 12) lor + ((n3 land 0b111111) lsl 6) lor + (n4 land 0b111111) + in + if p < 0x10000 then raise Malformed_code; + if p >= 0x110000 then + (* These code points are not supported. *) + raise Malformed_code; + write p k_out c_out, 4 + end + | _ -> + (* Outside the valid range of XML characters *) + raise Malformed_code; + in + (* n_out: number of written bytes; -1 means out buf is full + * n_in: number of read bytes; 0 means end of in buf reached + * n_in = 0 implies n_out = -1 + *) + if n_out < 0 then + k_in, k_out, `Enc_utf8 + else + scan (k_in + n_in) (k_out + n_out) (c_out + 1) + end + else + k_in, k_out, `Enc_utf8 + in + scan 0 0 0 +;; + + +let surrogate_offset = 0x10000 - (0xD800 lsl 10) - 0xDC00;; + +let read_utf16_le k_in_0 write s_in p_in l_in = + let rec scan k_in k_out c_out = + if k_in + 1 < l_in then begin + let p = (Char.code s_in.[p_in + k_in]) lor ((Char.code s_in.[p_in + k_in + 1]) lsl 8) in + + if p >= 0xd800 & p < 0xe000 then begin + (* This is a surrogate pair. *) + if k_in + 3 < l_in then begin + if p <= 0xdbff then begin + let q = (Char.code s_in.[p_in + k_in + 2 ]) lor + ((Char.code s_in.[p_in + k_in + 3]) lsl 8) in + if q < 0xdc00 or q > 0xdfff then raise Malformed_code; + let eff_p = (p lsl 10) + q + surrogate_offset in + let n = write eff_p k_out c_out in + if n < 0 then + k_in, k_out, `Enc_utf16_le + else + scan (k_in + 4) (k_out + n) (c_out + 1) + end + else + (* Malformed pair: *) + raise Malformed_code; + end + else + (* Incomplete pair: *) + k_in, k_out, `Enc_utf16_le + end + + else + if p = 0xfffe then + (* Big endian byte order mark: It is illegal here *) + raise Malformed_code + else begin + (* A regular code point *) + let n = write p k_out c_out in + if n < 0 then + k_in, k_out, `Enc_utf16_le + else + scan (k_in + 2) (k_out + n) (c_out + 1) + end + end + else + (* Incomplete character: *) + k_in, k_out, `Enc_utf16_le + in + scan k_in_0 0 0 +;; + + +let read_utf16_be k_in_0 write s_in p_in l_in = + let rec scan k_in k_out c_out = + if k_in + 1 < l_in then begin + let p = (Char.code s_in.[p_in + k_in + 1]) lor ((Char.code s_in.[p_in + k_in]) lsl 8) in + + if p >= 0xd800 & p < 0xe000 then begin + (* This is a surrogate pair. *) + if k_in + 3 < l_in then begin + if p <= 0xdbff then begin + let q = (Char.code s_in.[p_in + k_in + 3 ]) lor + ((Char.code s_in.[p_in + k_in + 2]) lsl 8) in + if q < 0xdc00 or q > 0xdfff then raise Malformed_code; + let eff_p = (p lsl 10) + q + surrogate_offset in + let n = write eff_p k_out c_out in + if n < 0 then + k_in, k_out, `Enc_utf16_be + else + scan (k_in + 4) (k_out + n) (c_out + 1) + end + else + (* Malformed pair: *) + raise Malformed_code; + end + else + (* Incomplete pair: *) + k_in, k_out, `Enc_utf16_be + end + + else + if p = 0xfffe then + (* Little endian byte order mark: It is illegal here *) + raise Malformed_code + else begin + (* A regular code point *) + let n = write p k_out c_out in + if n < 0 then + k_in, k_out, `Enc_utf16_be + else + scan (k_in + 2) (k_out + n) (c_out + 1) + end + + end + else + (* Incomplete character: *) + k_in, k_out, `Enc_utf16_be + in + scan k_in_0 0 0 +;; + + +let read_utf16 write s_in p_in l_in = + (* Expect a BOM at the beginning of the text *) + if l_in >= 2 then begin + let c0 = s_in.[p_in + 0] in + let c1 = s_in.[p_in + 1] in + if c0 = '\254' & c1 = '\255' then begin + (* 0xfeff as big endian *) + read_utf16_be 2 write s_in p_in l_in + end + else + if c0 = '\255' & c1 = '\254' then begin + (* 0xfeff as little endian *) + read_utf16_le 2 write s_in p_in l_in + end + else + (* byte order mark missing *) + raise Malformed_code + end + else + 0, 0, `Enc_utf16 +;; + + +let write_iso88591 s_out p_out l_out max_chars w p k_out c_out = + if k_out < l_out && c_out < max_chars then begin + if p > 255 then begin + let subst = w p in + let l_subst = String.length subst in + if k_out + l_subst <= l_out then begin + (* Enough space to store 'subst': *) + String.blit subst 0 s_out (k_out+p_out) l_subst; + l_subst + end + else + (* Not enough space: Stop this round of recoding *) + -1 + end + else begin + s_out.[p_out + k_out] <- Char.chr p; + 1 + end + end + else + -1 (* End-of-buffer indicator *) +;; + + +let write_usascii s_out p_out l_out max_chars w p k_out c_out = + if k_out < l_out && c_out < max_chars then begin + if p > 127 then begin + let subst = w p in + let l_subst = String.length subst in + if k_out + l_subst <= l_out then begin + (* Enough space to store 'subst': *) + String.blit subst 0 s_out (k_out+p_out) l_subst; + l_subst + end + else + (* Not enough space: Stop this round of recoding *) + -1 + end + else begin + s_out.[p_out + k_out] <- Char.chr p; + 1 + end + end + else + -1 (* End-of-buffer indicator *) +;; + + +let write_8bit from_unicode s_out p_out l_out max_chars w p k_out c_out = + if k_out < l_out && c_out < max_chars then begin + let p' = + match Array.unsafe_get from_unicode (p land 255) with + Netmappings.U_nil -> -1 + | Netmappings.U_single (p0,q0) -> + if p0 = p then q0 else -1 + | Netmappings.U_list l -> + (try List.assoc p l with Not_found -> -1) + in + if p' < 0 then begin + let subst = w p in + let l_subst = String.length subst in + if k_out + l_subst <= l_out then begin + (* Enough space to store 'subst': *) + String.blit subst 0 s_out (k_out+p_out) l_subst; + l_subst + end + else + (* Not enough space: Stop this round of recoding *) + -1 + end + else begin + s_out.[p_out + k_out] <- Char.chr p'; + 1 + end + end + else + -1 (* End-of-buffer indicator *) +;; + + +let write_utf8 is_java s_out p_out l_out max_chars w p k_out c_out = + if p <= 127 && (not is_java || p <> 0) then begin + if k_out < l_out && c_out < max_chars then begin + s_out.[p_out + k_out] <- Char.chr p; + 1 + end + else -1 + end + else if p <= 0x7ff then begin + if k_out + 1 < l_out && c_out < max_chars then begin + s_out.[p_out + k_out] <- Char.chr (0xc0 lor (p lsr 6)); + s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor (p land 0x3f)); + 2 + end + else -1 + end + else if p <= 0xffff then begin + (* Refuse writing surrogate pairs, and fffe, ffff *) + if (p >= 0xd800 & p < 0xe000) or (p >= 0xfffe) then + failwith "Netconversion.write_utf8"; + if k_out + 2 < l_out && c_out < max_chars then begin + s_out.[p_out + k_out] <- Char.chr (0xe0 lor (p lsr 12)); + s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f)); + s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor (p land 0x3f)); + 3 + end + else -1 + end + else if p <= 0x10ffff then begin + if k_out + 3 < l_out && c_out < max_chars then begin + s_out.[p_out + k_out] <- Char.chr (0xf0 lor (p lsr 18)); + s_out.[p_out + k_out + 1] <- Char.chr (0x80 lor ((p lsr 12) land 0x3f)); + s_out.[p_out + k_out + 2] <- Char.chr (0x80 lor ((p lsr 6) land 0x3f)); + s_out.[p_out + k_out + 3] <- Char.chr (0x80 lor (p land 0x3f)); + 4 + end + else -1 + end + else + (* Higher code points are not possible in XML: *) + failwith "Netconversion.write_utf8" +;; + + +let write_utf16_le s_out p_out l_out max_chars w p k_out c_out = + if p >= 0xfffe then begin + if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_le"; + (* Must be written as surrogate pair *) + if k_out + 3 < l_out && c_out < max_chars then begin + let high = (p lsr 10) + 0xd800 in + let low = (p land 0x3ff) + 0xdc00 in + s_out.[p_out + k_out ] <- Char.chr (high land 0xff); + s_out.[p_out + k_out + 1] <- Char.chr (high lsr 8); + s_out.[p_out + k_out + 2] <- Char.chr (low land 0xff); + s_out.[p_out + k_out + 3] <- Char.chr (low lsr 8); + 4 + end + else -1 + end + else begin + if k_out + 1 < l_out && c_out < max_chars then begin + s_out.[p_out + k_out ] <- Char.chr (p land 0xff); + s_out.[p_out + k_out + 1] <- Char.chr (p lsr 8); + 2 + end + else + -1 + end +;; + + +let write_utf16_be s_out p_out l_out max_chars w p k_out c_out = + if p >= 0xfffe then begin + if p <= 0xffff or p > 0x10ffff then failwith "Netconversion.write_utf16_be"; + (* Must be written as surrogate pair *) + if k_out + 3 < l_out && c_out < max_chars then begin + let high = (p lsr 10) + 0xd800 in + let low = (p land 0x3ff) + 0xdc00 in + s_out.[p_out + k_out + 1] <- Char.chr (high land 0xff); + s_out.[p_out + k_out ] <- Char.chr (high lsr 8); + s_out.[p_out + k_out + 3] <- Char.chr (low land 0xff); + s_out.[p_out + k_out + 2] <- Char.chr (low lsr 8); + 4 + end + else -1 + end + else begin + if k_out + 1 < l_out && c_out < max_chars then begin + s_out.[p_out + k_out + 1] <- Char.chr (p land 0xff); + s_out.[p_out + k_out ] <- Char.chr (p lsr 8); + 2 + end + else + -1 + end +;; + + +let recode ~in_enc + ~in_buf + ~in_pos + ~in_len + ~out_enc + ~out_buf + ~out_pos + ~out_len + ~max_chars + ~subst = + if (in_pos < 0 || in_len < 0 || in_pos + in_len > String.length in_buf || + out_pos < 0 || out_len < 0 || out_pos + out_len > String.length out_buf) + then + invalid_arg "Netconversion.recode"; + + let reader = + match in_enc with + `Enc_iso88591 -> read_iso88591 + | `Enc_usascii -> read_usascii + | `Enc_utf8 -> read_utf8 false + | `Enc_java -> read_utf8 true + | `Enc_utf16 -> read_utf16 + | `Enc_utf16_le -> read_utf16_le 0 + | `Enc_utf16_be -> read_utf16_be 0 + | _ -> + (try + let to_unicode' = Hashtbl.find Netmappings.to_unicode in_enc in + let to_unicode = + Netmappings.lock(); + Lazy.force to_unicode' in + Netmappings.unlock(); + read_8bit to_unicode in_enc + with + Not_found -> + failwith("Support for the encoding `" ^ + string_of_encoding in_enc ^ + "' has not been compiled into Netstring") + ) + in + let writer = + match out_enc with + `Enc_iso88591 -> write_iso88591 out_buf out_pos out_len max_chars subst + | `Enc_usascii -> write_usascii out_buf out_pos out_len max_chars subst + | `Enc_utf8 -> write_utf8 false + out_buf out_pos out_len max_chars subst + | `Enc_java -> write_utf8 true out_buf out_pos out_len max_chars subst + | `Enc_utf16 -> failwith "Netconversion.recode" + | `Enc_utf16_le -> write_utf16_le out_buf out_pos out_len max_chars subst + | `Enc_utf16_be -> write_utf16_be out_buf out_pos out_len max_chars subst + | _ -> + (try + let from_unicode' = Hashtbl.find Netmappings.from_unicode out_enc + in + let from_unicode = + Netmappings.lock(); + Lazy.force from_unicode' in + Netmappings.unlock(); + write_8bit from_unicode out_buf out_pos out_len max_chars subst + with + Not_found -> + failwith("Support for the encoding `" ^ + string_of_encoding out_enc ^ + "' has not been compiled into Netstring") + ) + in + reader writer in_buf in_pos in_len +;; + + +let makechar enc p = + match enc with + `Enc_iso88591 -> + if p > 255 then raise Not_found; + String.make 1 (Char.chr p) + | `Enc_usascii -> + if p > 127 then raise Not_found; + String.make 1 (Char.chr p) + | `Enc_utf8 -> + let s = String.create 4 in + let n = write_utf8 false s 0 4 1 (fun _ -> raise Not_found) p 0 0 in + String.sub s 0 n + | `Enc_java -> + let s = String.create 4 in + let n = write_utf8 true s 0 4 1 (fun _ -> raise Not_found) p 0 0 in + String.sub s 0 n + | `Enc_utf16_le -> + let s = String.create 4 in + let n = write_utf16_le s 0 4 1 (fun _ -> raise Not_found) p 0 0 in + String.sub s 0 n + | `Enc_utf16_be -> + let s = String.create 4 in + let n = write_utf16_be s 0 4 1 (fun _ -> raise Not_found) p 0 0 in + String.sub s 0 n + | `Enc_utf16 -> + failwith "Netconversion.makechar" + | _ -> + let s = String.create 1 in + let from_unicode' = + try + Hashtbl.find Netmappings.from_unicode enc + with + Not_found -> + failwith("Support for the encoding `" ^ + string_of_encoding enc ^ + "' has not been compiled into Netstring") + in + let from_unicode = + Netmappings.lock(); + Lazy.force from_unicode' in + Netmappings.unlock(); + let n = + write_8bit from_unicode s 0 1 1 (fun _ -> raise Not_found) p 0 0 in + s +;; + + +let recode_string ~in_enc ~out_enc ?(subst = (fun _ -> raise Not_found)) s = + + let length = String.length s in + let size = 1024 in + let out_buf = String.create size in + + let rec recode_loop k s_done in_enc = + (* 'k' bytes of 's' have already been processed, and the result is in + * 's_done'. + *) + (* Recode to 'out_buf': *) + let in_len = length - k in + let in_done, out_done, in_enc' = + recode ~in_enc:in_enc ~in_buf:s ~in_pos:k ~in_len:in_len + ~out_enc:out_enc ~out_buf:out_buf ~out_pos:0 ~out_len:size + ~max_chars:size ~subst:subst in + (* Collect the results: *) + let k' = k + in_done in + let s_done' = String.sub out_buf 0 out_done :: s_done in + (* Still something to do? *) + if k' < length then + recode_loop k' s_done' in_enc' + else + (* No: Concatenate s_done' to get the final result. *) + String.concat "" (List.rev s_done') + in + + recode_loop 0 [] in_enc +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:28 lpadovan + * Initial revision + * + * Revision 1.2 2000/08/29 00:46:41 gerd + * New type for the Unicode to 8 bit translation table. + * The Netmappings tables are now Lazy.t. + * + * Revision 1.1 2000/08/13 00:02:57 gerd + * Initial revision. + * + * + * ====================================================================== + * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.ml): + * + * Revision 1.5 2000/07/27 00:41:14 gerd + * new 8 bit codes + * + * Revision 1.4 2000/07/04 22:11:41 gerd + * Implemented the enhancements and extensions of + * rev. 1.4 of pxp_encoding.mli. + * + * Revision 1.3 2000/05/29 23:48:38 gerd + * Changed module names: + * Markup_aux into Pxp_aux + * Markup_codewriter into Pxp_codewriter + * Markup_document into Pxp_document + * Markup_dtd into Pxp_dtd + * Markup_entity into Pxp_entity + * Markup_lexer_types into Pxp_lexer_types + * Markup_reader into Pxp_reader + * Markup_types into Pxp_types + * Markup_yacc into Pxp_yacc + * See directory "compatibility" for (almost) compatible wrappers emulating + * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc. + * + * Revision 1.2 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.1 2000/05/20 20:30:50 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/netconversion.mli b/helm/DEVEL/pxp/netstring/netconversion.mli new file mode 100644 index 000000000..5e3e4b4e1 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/netconversion.mli @@ -0,0 +1,241 @@ +(* $Id$ + * ---------------------------------------------------------------------- + *) + +exception Malformed_code + +(* Encodings: + * - With the exception of UTF-8 and UTF-16, only single-byte character sets + * are supported. + * - I took the mappings from www.unicode.org, and the standard names of + * the character sets from IANA. Obviously, many character sets are missing + * that can be supported; especially ISO646 character sets, many EBCDIC + * code pages. + * - Because of the copyright statement from Unicode, I cannot put the + * source tables that describe the mappings into the distribution. They + * are publicly available from www.unicode.org. + * - Because of this, it is difficult for you to extend the list of character + * sets; you need the source tables I am not allowed to distribute. + * These tables have a very simple format: Every line describes a pair + * of code points; the left code (<= 0xff) is the code in the character + * set, the right code (<= 0xffff) is the Unicode equivalent. + * For an example, see + * http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT + * You can send me such files, and I will integrate them into the + * distribution (if possible). + * - I really do not know very much about the character sets used in + * East Asia. If you need them, please write the necessary conversion + * functions and send them to me. + * + * KNOWN PROBLEMS: + * - The following charsets do not have a bijective mapping to Unicode: + * adobe_standard_encoding, adobe_symbol_encoding, + * adobe_zapf_dingbats_encoding, cp1002 (0xFEBE). The current implementation + * simply removes one of the conflicting code point pairs - this might + * not what you want. + *) + +type encoding = + [ `Enc_utf8 (* UTF-8 *) + | `Enc_java (* The variant of UTF-8 used by Java *) + | `Enc_utf16 (* UTF-16 with unspecified endianess (restricted usage) *) + | `Enc_utf16_le (* UTF-16 little endian *) + | `Enc_utf16_be (* UTF-16 big endian *) + | `Enc_usascii (* US-ASCII (only 7 bit) *) + | `Enc_iso88591 (* ISO-8859-1 *) + | `Enc_iso88592 (* ISO-8859-2 *) + | `Enc_iso88593 (* ISO-8859-3 *) + | `Enc_iso88594 (* ISO-8859-4 *) + | `Enc_iso88595 (* ISO-8859-5 *) + | `Enc_iso88596 (* ISO-8859-6 *) + | `Enc_iso88597 (* ISO-8859-7 *) + | `Enc_iso88598 (* ISO-8859-8 *) + | `Enc_iso88599 (* ISO-8859-9 *) + | `Enc_iso885910 (* ISO-8859-10 *) + | `Enc_iso885913 (* ISO-8859-13 *) + | `Enc_iso885914 (* ISO-8859-14 *) + | `Enc_iso885915 (* ISO-8859-15 *) + | `Enc_koi8r (* KOI8-R *) + | `Enc_jis0201 (* JIS-0201 *) + (* Microsoft: *) + | `Enc_windows1250 (* WINDOWS-1250 *) + | `Enc_windows1251 (* WINDOWS-1251 *) + | `Enc_windows1252 (* WINDOWS-1252 *) + | `Enc_windows1253 (* WINDOWS-1253 *) + | `Enc_windows1254 (* WINDOWS-1254 *) + | `Enc_windows1255 (* WINDOWS-1255 *) + | `Enc_windows1256 (* WINDOWS-1256 *) + | `Enc_windows1257 (* WINDOWS-1257 *) + | `Enc_windows1258 (* WINDOWS-1258 *) + (* IBM, ASCII-based: *) + | `Enc_cp437 + | `Enc_cp737 + | `Enc_cp775 + | `Enc_cp850 + | `Enc_cp852 + | `Enc_cp855 + | `Enc_cp856 + | `Enc_cp857 + | `Enc_cp860 + | `Enc_cp861 + | `Enc_cp862 + | `Enc_cp863 + | `Enc_cp864 + | `Enc_cp865 + | `Enc_cp866 + | `Enc_cp869 + | `Enc_cp874 + | `Enc_cp1006 + (* IBM, EBCDIC-based: *) + | `Enc_cp037 + | `Enc_cp424 + | `Enc_cp500 + | `Enc_cp875 + | `Enc_cp1026 + (* Adobe: *) + | `Enc_adobe_standard_encoding + | `Enc_adobe_symbol_encoding + | `Enc_adobe_zapf_dingbats_encoding + (* Apple: *) + | `Enc_macroman + + ] + + +val encoding_of_string : string -> encoding;; + (* Returns the encoding of the name of the encoding. Fails if the + * encoding is unknown. + * E.g. encoding_of_string "iso-8859-1" = `Enc_iso88591 + *) + +val string_of_encoding : encoding -> string;; + (* Returns the name of the encoding. *) + + +val makechar : encoding -> int -> string + (* makechar enc i: + * Creates the string representing the code point i in encoding enc. + * Raises Not_found if the character is legal but cannot be represented + * in enc. + * + * Possible encodings: everything but `Enc_utf16. + *) + +val recode : in_enc:encoding -> + in_buf:string -> + in_pos:int -> + in_len:int -> + out_enc:encoding -> + out_buf:string -> + out_pos:int -> + out_len:int -> + max_chars:int -> + subst:(int -> string) -> (int * int * encoding) + (* + * let (in_n, out_n, in_enc') = + * recode in_enc in_buf in_len out_enc out_buf out_pos out_len max_chars + * subst: + * Converts the character sequence contained in the at most in_len bytes + * of in_buf starting at position in_pos, and writes the result + * into at most out_len bytes of out_buf starting at out_pos. + * At most max_chars are written into out_buf. + * The characters in in_buf are assumed to be encoded as in_enc, and the + * characters in out_buf will be encoded as out_enc. + * If there is a code point which cannot be represented in out_enc, + * the function subst is called with the code point as argument, and the + * resulting string (which must already be encoded as out_enc) is + * inserted instead. + * Note: It is possible that subst is called several times for the same + * character. + * Return value: out_n is the actual number of bytes written into out_buf. + * in_n is the actual number of bytes that have been converted from + * in_buf; in_n may be smaller than in_len because of incomplete + * multi-byte characters, or because the output buffer has less space + * for characters than the input buffer, or because of a change + * of the encoding variant. + * If there is at least one complete character in in_buf, and at least + * space for one complete character in out_buf, and max_chars >= 1, it is + * guaranteed that in_n > 0 or out_n > 0. + * in_enc' is normally identical to in_enc. However, there are cases + * in which the encoding can be refined when looking at the byte + * sequence; for example whether a little endian or big endian variant + * of the encoding is used. in_enc' is the variant of in_enc that was + * used for the last character that has been converted. + * + * NOTES: + * + * Supported range of code points: 0 to 0xd7ff, 0xe000 to 0xfffd, + * 0x10000 to 0x10ffff. + * + * Enc_utf8: Malformed UTF-8 byte sequences are always rejected. This + * is also true for the sequence 0xc0 0x80 which is used by some software + * (Java) as paraphrase for the code point 0. + * + * Enc_utf16: When reading from a string encoded as Enc_utf16, a byte + * order mark is expected at the beginning. The detected variant + * (Enc_utf16_le or Enc_utf16_be) is returned. The byte order mark is + * not included into the output string. - It is not possible to + * write as Enc_utf16. + * + * Enc_utf16_le, Enc_utf16_be: When reading from such a string, the + * code point 0xfeff is returned as it is; it is a "zero-width + * non-breaking space". The code point 0xfffe is rejected. + * + * Surrogate pairs: These are recognized (or written) only for a + * UTF-16 encoding; and rejected for any other encoding. + * + * Rejected byte sequences cause the exception Bad_character_stream. + *) + +val recode_string : in_enc:encoding -> + out_enc:encoding -> + ?subst:(int -> string) -> + string -> + string + (* Recodes a complete string from in_enc to out_enc, and returns it. + * The function subst is invoked for code points of in_enc that cannot + * be represented in out_enc, and the result of the function invocation + * is substituted. + * If subst is missing, Not_found is raised in this case. + *) + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:28 lpadovan + * Initial revision + * + * Revision 1.1 2000/08/13 00:02:57 gerd + * Initial revision. + * + * + * ====================================================================== + * OLD LOGS FROM THE PXP PACKAGE (FILE NAME pxp_encoding.mli): + * + * Revision 1.4 2000/07/04 22:05:58 gerd + * Enhanced version of 'recode'. Labeled arguments. + * New function 'recode_string'. + * + * Revision 1.3 2000/05/29 23:48:38 gerd + * Changed module names: + * Markup_aux into Pxp_aux + * Markup_codewriter into Pxp_codewriter + * Markup_document into Pxp_document + * Markup_dtd into Pxp_dtd + * Markup_entity into Pxp_entity + * Markup_lexer_types into Pxp_lexer_types + * Markup_reader into Pxp_reader + * Markup_types into Pxp_types + * Markup_yacc into Pxp_yacc + * See directory "compatibility" for (almost) compatible wrappers emulating + * Markup_document, Markup_dtd, Markup_reader, Markup_types, and Markup_yacc. + * + * Revision 1.2 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.1 2000/05/20 20:30:50 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/netencoding.ml b/helm/DEVEL/pxp/netstring/netencoding.ml new file mode 100644 index 000000000..e87c4c397 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/netencoding.ml @@ -0,0 +1,903 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + + +module Str = Netstring_str;; + +module Base64 = struct + let b64_pattern plus slash = + [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M'; + 'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z'; + 'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm'; + 'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z'; + '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];; + + + let rfc_pattern = b64_pattern '+' '/';; + let url_pattern = b64_pattern '-' '/';; + + let encode_with_options b64 equal s pos len linelen crlf = + (* encode using "base64". + * 'b64': The encoding table, created by b64_pattern. + * 'equal': The character that should be used instead of '=' in the original + * encoding scheme. Pass '=' to get the original encoding scheme. + * s, pos, len, linelen: See the interface description of encode_substring. + *) + assert (Array.length b64 = 64); + if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then + invalid_arg "Netencoding.Base64.encode_with_options"; + if pos + len > String.length s then + invalid_arg "Netencoding.Base64.encode_with_options"; + + let linelen = + (linelen/4) * 4 in + + let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in + (* l_t: length of the result without additional line endings *) + + let l_t' = + if linelen < 4 then + l_t + else + if l_t = 0 then 0 else + let n_lines = ((l_t - 1) / linelen) + 1 in + l_t + n_lines * (if crlf then 2 else 1) + in + (* l_t': length of the result with CRLF or LF characters *) + + let t = String.make l_t' equal in + let j = ref 0 in + let q = ref 0 in + for k = 0 to len / 3 - 1 do + let p = pos + 3*k in + (* p >= pos >= 0: this is evident + * p+2 < pos+len <= String.length s: + * Because k <= len/3-1 + * 3*k <= 3*(len/3-1) = len - 3 + * pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len + * So it is proved that the following unsafe string accesses always + * work. + *) + let bits = (Char.code (String.unsafe_get s (p)) lsl 16) lor + (Char.code (String.unsafe_get s (p+1)) lsl 8) lor + (Char.code (String.unsafe_get s (p+2))) in + (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *) + assert(!j + 3 < l_t'); + String.unsafe_set t !j (Array.unsafe_get b64 ( bits lsr 18)); + String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63)); + String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr 6) land 63)); + String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits land 63)); + j := !j + 4; + if linelen > 3 then begin + q := !q + 4; + if !q + 4 > linelen then begin + (* The next 4 characters won't fit on the current line. So insert + * a line ending. + *) + if crlf then begin + t.[ !j ] <- '\013'; + t.[ !j+1 ] <- '\010'; + j := !j + 2; + end + else begin + t.[ !j ] <- '\010'; + incr j + end; + q := 0; + end; + end; + done; + (* padding if needed: *) + let m = len mod 3 in + begin + match m with + 0 -> () + | 1 -> + let bits = Char.code (s.[pos + len - 1]) in + t.[ !j ] <- b64.( bits lsr 2); + t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4); + j := !j + 4; + q := !q + 4; + | 2 -> + let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor + (Char.code (s.[pos + len - 1])) in + t.[ !j ] <- b64.( bits lsr 10); + t.[ !j + 1 ] <- b64.((bits lsr 4) land 0x3f); + t.[ !j + 2 ] <- b64.((bits lsl 2) land 0x3f); + j := !j + 4; + q := !q + 4; + | _ -> assert false + end; + + (* If required, add another line end: *) + + if linelen > 3 & !q > 0 then begin + if crlf then begin + t.[ !j ] <- '\013'; + t.[ !j+1 ] <- '\010'; + j := !j + 2; + end + else begin + t.[ !j ] <- '\010'; + incr j + end; + end; + + t ;; + + + + let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_with_options rfc_pattern '=' s pos l linelength crlf;; + + + let encode_substring s ~pos ~len ~linelength ~crlf = + encode_with_options rfc_pattern '=' s pos len linelength crlf;; + + + let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_with_options url_pattern '.' s pos l linelength crlf;; + + + let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces = + if len < 0 or pos < 0 or pos > String.length t then + invalid_arg "Netencoding.Base64.decode_substring"; + if pos + len > String.length t then + invalid_arg "Netencoding.Base64.decode_substring"; + + (* Compute the number of effective characters l_t in 't'; + * pad_chars: number of '=' characters at the end of the string. + *) + let l_t, pad_chars = + if p_spaces then begin + (* Count all non-whitespace characters: *) + let c = ref 0 in + let p = ref 0 in + for i = pos to pos + len - 1 do + match String.unsafe_get t i with + (' '|'\t'|'\r'|'\n') -> () + | ('='|'.') as ch -> + if ch = '.' & not p_url then + invalid_arg "Netencoding.Base64.decode_substring"; + incr c; + incr p; + if !p > 2 then + invalid_arg "Netencoding.Base64.decode_substring"; + for j = i+1 to pos + len - 1 do + match String.unsafe_get t j with + (' '|'\t'|'\r'|'\n'|'.'|'=') -> () + | _ -> + (* Only another '=' or spaces allowed *) + invalid_arg "Netencoding.Base64.decode_substring"; + done + | _ -> incr c + done; + if !c mod 4 <> 0 then + invalid_arg "Netencoding.Base64.decode_substring"; + !c, !p + end + else + len, + ( if len mod 4 <> 0 then + invalid_arg "Netencoding.Base64.decode_substring"; + if len > 0 then ( + if String.sub t (len - 2) 2 = "==" or + (p_url & String.sub t (len - 2) 2 = "..") then 2 + else + if String.sub t (len - 1) 1 = "=" or + (p_url & String.sub t (len - 1) 1 = ".") then 1 + else + 0 + ) + else 0 + ) + in + + let l_s = (l_t / 4) * 3 - pad_chars in (* sic! *) + let s = String.create l_s in + + let decode_char c = + match c with + 'A' .. 'Z' -> Char.code(c) - 65 (* 65 = Char.code 'A' *) + | 'a' .. 'z' -> Char.code(c) - 71 (* 71 = Char.code 'a' - 26 *) + | '0' .. '9' -> Char.code(c) + 4 (* -4 = Char.code '0' - 52 *) + | '+' -> 62 + | '-' -> if not p_url then + invalid_arg "Netencoding.Base64.decode_substring"; + 62 + | '/' -> 63 + | _ -> invalid_arg "Netencoding.Base64.decode_substring"; + in + + (* Decode all but the last quartet: *) + + let cursor = ref pos in + let rec next_char() = + match t.[ !cursor ] with + (' '|'\t'|'\r'|'\n') -> + if p_spaces then (incr cursor; next_char()) + else invalid_arg "Netencoding.Base64.decode_substring" + | c -> + incr cursor; c + in + + if p_spaces then begin + for k = 0 to l_t / 4 - 2 do + let q = 3*k in + let c0 = next_char() in + let c1 = next_char() in + let c2 = next_char() in + let c3 = next_char() in + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let n3 = decode_char c3 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + let x2 = ((n2 lsl 6) land 0xc0) lor n3 in + String.unsafe_set s q (Char.chr x0); + String.unsafe_set s (q+1) (Char.chr x1); + String.unsafe_set s (q+2) (Char.chr x2); + done; + end + else begin + (* Much faster: *) + for k = 0 to l_t / 4 - 2 do + let p = pos + 4*k in + let q = 3*k in + let c0 = String.unsafe_get t p in + let c1 = String.unsafe_get t (p + 1) in + let c2 = String.unsafe_get t (p + 2) in + let c3 = String.unsafe_get t (p + 3) in + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let n3 = decode_char c3 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + let x2 = ((n2 lsl 6) land 0xc0) lor n3 in + String.unsafe_set s q (Char.chr x0); + String.unsafe_set s (q+1) (Char.chr x1); + String.unsafe_set s (q+2) (Char.chr x2); + done; + cursor := pos + l_t - 4; + end; + + (* Decode the last quartet: *) + + if l_t > 0 then begin + let q = 3*(l_t / 4 - 1) in + let c0 = next_char() in + let c1 = next_char() in + let c2 = next_char() in + let c3 = next_char() in + + if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + s.[ q ] <- Char.chr x0; + end + else + if (c3 = '=') or (p_url & c3 = '.') then begin + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + s.[ q ] <- Char.chr x0; + s.[ q+1 ] <- Char.chr x1; + end + else begin + let n0 = decode_char c0 in + let n1 = decode_char c1 in + let n2 = decode_char c2 in + let n3 = decode_char c3 in + let x0 = (n0 lsl 2) lor (n1 lsr 4) in + let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in + let x2 = ((n2 lsl 6) land 0xc0) lor n3 in + s.[ q ] <- Char.chr x0; + s.[ q+1 ] <- Char.chr x1; + s.[ q+2 ] <- Char.chr x2; + end + + end; + + s ;; + + + + let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s = + let l = match len with None -> String.length s - pos | Some x -> x in + decode_substring s pos l url_variant accept_spaces;; + + let decode_ignore_spaces s = + decode_substring s 0 (String.length s) true true;; + + +end + + + +module QuotedPrintable = struct + + let encode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.QuotedPrintable.encode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.QuotedPrintable.encode_substring"; + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + ('\r'|'\n') -> + count (n+1) (i+1) + | ('\000'..'\031'|'\127'..'\255'| + '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') -> + count (n+3) (i+1) + | ' ' -> + (* Protect spaces only if they occur at the end of a line *) + if i+1 < len then + match s.[pos+i+1] with + ('\r'|'\n') -> + count (n+3) (i+1) + | _ -> + count (n+1) (i+1) + else + count (n+3) (i+1) + | _ -> + count (n+1) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + + let hexdigit = + [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; + '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in + + let k = ref 0 in + + let add_quoted c = + t.[ !k ] <- '='; + t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); + t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) + in + + for i = 0 to len - 1 do + match String.unsafe_get s i with + ('\r'|'\n') as c -> + String.unsafe_set t !k c; + incr k + | ('\000'..'\031'|'\127'..'\255'| + '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c -> + add_quoted c; + k := !k + 3 + | ' ' -> + (* Protect spaces only if they occur at the end of a line *) + if i+1 < len then + match s.[pos+i+1] with + ('\r'|'\n') -> + add_quoted ' '; + k := !k + 3; + | _ -> + String.unsafe_set t !k ' '; + incr k + else begin + add_quoted ' '; + k := !k + 3; + end + | c -> + String.unsafe_set t !k c; + incr k + done; + + t ;; + + + let encode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_substring s pos l;; + + + + let decode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.QuotedPrintable.decode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.QuotedPrintable.decode_substring"; + + let decode_hex c = + match c with + '0'..'9' -> Char.code c - 48 + | 'A'..'F' -> Char.code c - 55 + | 'a'..'f' -> Char.code c - 87 + | _ -> + invalid_arg "Netencoding.QuotedPrintable.decode_substring"; + in + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + '=' -> + if i+1 = len then + (* A '=' at EOF is ignored *) + count n (i+1) + else + if i+1 < len then + match s.[pos+i+1] with + '\r' -> + (* Official soft break *) + if i+2 < len & s.[pos+i+2] = '\n' then + count n (i+3) + else + count n (i+2) + | '\n' -> + (* Inofficial soft break *) + count n (i+2) + | _ -> + if i+2 >= len then + invalid_arg + "Netencoding.QuotedPrintable.decode_substring"; + let _ = decode_hex s.[pos+i+1] in + let _ = decode_hex s.[pos+i+2] in + count (n+1) (i+3) + else + invalid_arg "Netencoding.QuotedPrintable.decode_substring" + | _ -> + count (n+1) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + let k = ref pos in + let e = pos + len in + let i = ref 0 in + + while !i < l do + match String.unsafe_get s !k with + '=' -> + if !k+1 = e then + (* A '=' at EOF is ignored *) + () + else + if !k+1 < e then + match s.[!k+1] with + '\r' -> + (* Official soft break *) + if !k+2 < e & s.[!k+2] = '\n' then + k := !k + 3 + else + k := !k + 2 + | '\n' -> + (* Inofficial soft break *) + k := !k + 2 + | _ -> + if !k+2 >= e then + invalid_arg + "Netencoding.QuotedPrintable.decode_substring"; + let x1 = decode_hex s.[!k+1] in + let x2 = decode_hex s.[!k+2] in + t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); + k := !k + 3; + incr i + else + invalid_arg "Netencoding.QuotedPrintable.decode_substring" + | c -> + String.unsafe_set t !i c; + incr k; + incr i + done; + + t ;; + + + let decode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + decode_substring s pos l;; + +end + + +module Q = struct + + let encode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.Q.encode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.Q.encode_substring"; + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + | ('A'..'Z'|'a'..'z'|'0'..'9') -> + count (n+1) (i+1) + | _ -> + count (n+3) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + + let hexdigit = + [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; + '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in + + let k = ref 0 in + + let add_quoted c = + t.[ !k ] <- '='; + t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 ); + t.[ !k+2 ] <- hexdigit.( Char.code c land 15 ) + in + + for i = 0 to len - 1 do + match String.unsafe_get s i with + | ('A'..'Z'|'a'..'z'|'0'..'9') as c -> + String.unsafe_set t !k c; + incr k + | c -> + add_quoted c; + k := !k + 3 + done; + + t ;; + + + let encode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + encode_substring s pos l;; + + + + let decode_substring s ~pos ~len = + + if len < 0 or pos < 0 or pos > String.length s then + invalid_arg "Netencoding.Q.decode_substring"; + if pos + len > String.length s then + invalid_arg "Netencoding.Q.decode_substring"; + + let decode_hex c = + match c with + '0'..'9' -> Char.code c - 48 + | 'A'..'F' -> Char.code c - 55 + | 'a'..'f' -> Char.code c - 87 + | _ -> + invalid_arg "Netencoding.Q.decode_substring"; + in + + let rec count n i = + if i < len then + match String.unsafe_get s (pos+i) with + '=' -> + if i+2 >= len then + invalid_arg "Netencoding.Q.decode_substring"; + let _ = decode_hex s.[pos+i+1] in + let _ = decode_hex s.[pos+i+2] in + count (n+1) (i+3) + | _ -> (* including '_' *) + count (n+1) (i+1) + else + n + in + + let l = count 0 0 in + let t = String.create l in + let k = ref pos in + let e = pos + len in + let i = ref 0 in + + while !i < l do + match String.unsafe_get s !k with + '=' -> + if !k+2 >= e then + invalid_arg "Netencoding.Q.decode_substring"; + let x1 = decode_hex s.[!k+1] in + let x2 = decode_hex s.[!k+2] in + t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2); + k := !k + 3; + incr i + | '_' -> + String.unsafe_set t !i ' '; + incr k; + incr i + | c -> + String.unsafe_set t !i c; + incr k; + incr i + done; + + t ;; + + + let decode ?(pos=0) ?len s = + let l = match len with None -> String.length s - pos | Some x -> x in + decode_substring s pos l ;; + +end + + +module Url = struct + let hex_digits = + [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; + '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];; + + let to_hex2 k = + (* Converts k to a 2-digit hex string *) + let s = String.create 2 in + s.[0] <- hex_digits.( (k lsr 4) land 15 ); + s.[1] <- hex_digits.( k land 15 ); + s ;; + + + let of_hex1 c = + match c with + ('0'..'9') -> Char.code c - Char.code '0' + | ('A'..'F') -> Char.code c - Char.code 'A' + 10 + | ('a'..'f') -> Char.code c - Char.code 'a' + 10 + | _ -> + raise Not_found ;; + + + + let url_encoding_re = + Str.regexp "[^A-Za-z0-9$_.!*'(),-]";; + + let url_decoding_re = + Str.regexp "\\+\\|%..\\|%.\\|%";; + + + let encode s = + Str.global_substitute + url_encoding_re + (fun r _ -> + match Str.matched_string r s with + " " -> "+" + | x -> + let k = Char.code(x.[0]) in + "%" ^ to_hex2 k + ) + s ;; + + + let decode s = + let l = String.length s in + Str.global_substitute + url_decoding_re + (fun r _ -> + match Str.matched_string r s with + | "+" -> " " + | _ -> + let i = Str.match_beginning r in + (* Assertion: s.[i] = '%' *) + if i+2 >= l then failwith "Cgi.decode"; + let c1 = s.[i+1] in + let c2 = s.[i+2] in + begin + try + let k1 = of_hex1 c1 in + let k2 = of_hex1 c2 in + String.make 1 (Char.chr((k1 lsl 4) lor k2)) + with + Not_found -> + failwith "Cgi.decode" + end + ) + s ;; + +end + + +module Html = struct + + let eref_re = + Str.regexp + "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;; + let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;; + + let etable = + [ "lt", "<"; + "gt", ">"; + "amp", "&"; + "quot", "\""; + (* Note: " is new in HTML-4.0, but it has been widely used + * much earlier. + *) + "nbsp", "\160"; + "iexcl", "\161"; + "cent", "\162"; + "pound", "\163"; + "curren", "\164"; + "yen", "\165"; + "brvbar", "\166"; + "sect", "\167"; + "uml", "\168"; + "copy", "\169"; + "ordf", "\170"; + "laquo", "\171"; + "not", "\172"; + "shy", "\173"; + "reg", "\174"; + "macr", "\175"; + "deg", "\176"; + "plusmn", "\177"; + "sup2", "\178"; + "sup3", "\179"; + "acute", "\180"; + "micro", "\181"; + "para", "\182"; + "middot", "\183"; + "cedil", "\184"; + "sup1", "\185"; + "ordm", "\186"; + "raquo", "\187"; + "frac14", "\188"; + "frac12", "\189"; + "frac34", "\190"; + "iquest", "\191"; + "Agrave", "\192"; + "Aacute", "\193"; + "Acirc", "\194"; + "Atilde", "\195"; + "Auml", "\196"; + "Aring", "\197"; + "AElig", "\198"; + "Ccedil", "\199"; + "Egrave", "\200"; + "Eacute", "\201"; + "Ecirc", "\202"; + "Euml", "\203"; + "Igrave", "\204"; + "Iacute", "\205"; + "Icirc", "\206"; + "Iuml", "\207"; + "ETH", "\208"; + "Ntilde", "\209"; + "Ograve", "\210"; + "Oacute", "\211"; + "Ocirc", "\212"; + "Otilde", "\213"; + "Ouml", "\214"; + "times", "\215"; + "Oslash", "\216"; + "Ugrave", "\217"; + "Uacute", "\218"; + "Ucirc", "\219"; + "Uuml", "\220"; + "Yacute", "\221"; + "THORN", "\222"; + "szlig", "\223"; + "agrave", "\224"; + "aacute", "\225"; + "acirc", "\226"; + "atilde", "\227"; + "auml", "\228"; + "aring", "\229"; + "aelig", "\230"; + "ccedil", "\231"; + "egrave", "\232"; + "eacute", "\233"; + "ecirc", "\234"; + "euml", "\235"; + "igrave", "\236"; + "iacute", "\237"; + "icirc", "\238"; + "iuml", "\239"; + "eth", "\240"; + "ntilde", "\241"; + "ograve", "\242"; + "oacute", "\243"; + "ocirc", "\244"; + "otilde", "\245"; + "ouml", "\246"; + "divide", "\247"; + "oslash", "\248"; + "ugrave", "\249"; + "uacute", "\250"; + "ucirc", "\251"; + "uuml", "\252"; + "yacute", "\253"; + "thorn", "\254"; + "yuml", "\255"; + ] ;; + + let quick_etable = + let ht = Hashtbl.create 50 in + List.iter (fun (name,value) -> Hashtbl.add ht name value) etable; + (* Entities to be decoded, but that must not be encoded: *) + Hashtbl.add ht "apos" "'"; (* used in XML documents *) + ht ;; + + let rev_etable = + let a = Array.create 256 "" in + List.iter (fun (name,value) -> + a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable; + for i = 0 to 8 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + for i = 11 to 12 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + for i = 14 to 31 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + for i = 127 to 159 do + a.(i) <- "&#" ^ string_of_int i ^ ";" + done; + a ;; + + let decode_to_latin1 s = + Str.global_substitute + eref_re + (fun r _ -> + let t = Str.matched_string r s in + try + let n = int_of_string(Str.matched_group r 2 s) in + if n < 256 then + String.make 1 (Char.chr n) + else + t + with + Not_found -> + try + let name = Str.matched_group r 3 s in + try + Hashtbl.find quick_etable name + with + Not_found -> + t + with + Not_found -> assert false + ) + s ;; + + let encode_from_latin1 s = + Str.global_substitute + unsafe_re + (fun r _ -> + let t = Str.matched_string r s in + let i = Char.code (t.[0]) in + rev_etable.(i) + ) + s ;; +end + + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.5 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.4 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.3 2000/03/03 17:03:16 gerd + * Q encoding: CR and LF are quoted. + * + * Revision 1.2 2000/03/03 01:08:29 gerd + * Added Netencoding.Html functions. + * + * Revision 1.1 2000/03/02 01:14:48 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/netencoding.mli b/helm/DEVEL/pxp/netstring/netencoding.mli new file mode 100644 index 000000000..6466572b3 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/netencoding.mli @@ -0,0 +1,271 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(**********************************************************************) +(* Several encodings important for the net *) +(**********************************************************************) + + +(**********************************************************************) +(* Base 64 encoding *) +(**********************************************************************) + +(* See RFC 2045 for a description of Base 64 encoding. *) + +(* THREAD-SAFETY: + * All Base64 functions are reentrant and thus thread-safe. + *) + +module Base64 : sig + + val encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool -> + string -> string + (* Compute the "base 64" encoding of the given string argument. + * Note that the result is a string that only contains the characters + * a-z, A-Z, 0-9, +, /, =, and optionally spaces, CR and LF characters. + * + * If pos and/or len are passed, only the substring starting at + * pos (default: 0) with length len (default: rest of the string) + * is encoded. + * + * The result is divided up into lines not longer than 'linelength' + * (without counting the line separator); default: do not divide lines. + * If 'linelength' is smaller than 4, no line division is performed. + * If 'linelength' is not divisible by 4, the produced lines are a + * bit shorter than 'linelength'. + * + * If 'crlf' (default: false) the lines are ended by CRLF; otherwise + * they are only ended by LF. + * (You need the crlf option to produce correct MIME messages.) + * + *) + + val url_encode : ?pos:int -> ?len:int -> ?linelength:int -> ?crlf:bool -> + string -> string + (* Same as 'encode' but use slightly different characters that can be + * part of URLs without additional encodings. + * The encoded string consists only of the characters a-z, A-Z, 0-9, + * -, /, . + * 'url_encode' does NOT implement the Base 64 encoding as described + * in the standard! + *) + + val encode_substring : string -> pos:int -> len:int -> linelength:int -> + crlf:bool -> string + (* *** DEPRECATED FUNCTION *** Use 'encode' instead! *** + * + * encode_substring s pos len linelen crlf: + * Encodes the substring at position 'pos' in 's' with length 'len'. + * The result is divided up into lines not longer than 'linelen' (without + * counting the line separator). + * If 'linelen' is smaller than 4, no line division is performed. + * If 'linelen' is not divisible by 4, the produced lines are a + * bit shorter than 'linelen'. + * If 'crlf' the lines are ended by CRLF; otherwise they are only + * ended by LF. + * (You need the crlf option to produce correct MIME messages.) + *) + + val decode : ?pos:int -> ?len:int -> ?url_variant:bool -> + ?accept_spaces:bool -> string -> string + (* Decodes the given string argument. + * + * If pos and/or len are passed, only the substring starting at + * pos (default: 0) with length len (default: rest of the string) + * is decoded. + * + * If url_variant (default: true) is set, the functions also + * accepts the characters '-' and '.' as produced by 'url_encode'. + * + * If accept_spaces (default: false) is set, the function ignores + * white space contained in the string to decode (otherwise the + * function fails if it finds white space). + *) + + val decode_ignore_spaces : string -> string + (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** + * + * Decodes the string, too, but it is allowed that the string contains + * whitespace characters. + * This function is slower than 'decode'. + *) + + val decode_substring : string -> pos:int -> len:int -> url_variant:bool -> + accept_spaces:bool -> string + (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** + * + * decode_substring s pos len url spaces: + * Decodes the substring of 's' beginning at 'pos' with length 'len'. + * If 'url', strings created by 'url_encode' are accepted, too. + * If 'spaces', whitespace characters are allowed in the string. + *) +end + +(**********************************************************************) +(* Quoted printable encoding *) +(**********************************************************************) + +(* See RFC 2045. + * This implementation assumes that the encoded string has a text MIME + * type. Because of this, the characters CR and LF are never protected + * by hex tokens; they are copied literally to the output string. + *) + +(* THREAD-SAFETY: + * All QuotedPrintable functions are reentrant and thus thread-safe. + *) + +module QuotedPrintable : + sig + val encode : ?pos:int -> ?len:int -> string -> string + (* Encodes the string and returns it. + * Note line breaks: + * No additional soft line breaks are added. The characters CR + * and LF are not represented as =0D resp. =0A. (But other control + * characters ARE encoded.) + * Note unsafe characters: + * As recommended by RFC 2045, the characters !\"#$@[]^`{|}~ + * are additionally represented as hex tokens. -- " + * + * If pos and/or len are passed, only the substring starting at + * pos (default: 0) with length len (default: rest of the string) + * is encoded. + *) + + val encode_substring : string -> pos:int -> len:int -> string + (* *** DEPRECATED FUNCTION *** Use 'encode' instead! *** + * encode_substring s pos len: + * Encodes the substring of 's' beginning at 'pos' with length 'len'. + *) + + val decode : ?pos:int -> ?len:int -> string -> string + (* Decodes the string and returns it. + * Most format errors cause an Invalid_argument exception. + * Note that soft line breaks can be properly decoded although + * 'encode' will never produce them. + * + * If pos and/or len are passed, only the substring starting at + * pos (default: 0) with length len (default: rest of the string) + * is decoded. + *) + + val decode_substring : string -> pos:int -> len:int -> string + (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** + * decode_substring s pos len: + * Decodes the substring of 's' beginning at 'pos' with length 'len'. + *) + + end + +(**********************************************************************) +(* Q encoding *) +(**********************************************************************) + +(* See RFC 2047. + * The functions behave similar to those of QuotedPrintable. + *) + +(* THREAD-SAFETY: + * All Q functions are reentrant and thus thread-safe. + *) + +module Q : + sig + val encode : ?pos:int -> ?len:int -> string -> string + (* Note: + * All characters except alphanumeric characters are protected by + * hex tokens. + * In particular, spaces are represented as "=20", not as "_". + *) + + val decode : ?pos:int -> ?len:int -> string -> string + + val encode_substring : string -> pos:int -> len:int -> string + (* *** DEPRECATED FUNCTION *** Use 'encode' instead! *** *) + + val decode_substring : string -> pos:int -> len:int -> string + (* *** DEPRECATED FUNCTION *** Use 'decode' instead! *** *) + end + +(**********************************************************************) +(* B encoding *) +(**********************************************************************) + +(* The B encoding of RFC 2047 is the same as Base64. *) + + +(**********************************************************************) +(* URL-encoding *) +(**********************************************************************) + +(* Encoding/Decoding within URLs: + * + * The following two functions perform the '%'-substitution for + * characters that may otherwise be interpreted as metacharacters. + * + * According to: RFC 1738, RFC 1630 + *) + +(* THREAD-SAFETY: + * The Url functions are thread-safe. + *) + +module Url : + sig + val decode : string -> string + val encode : string -> string + end + + +(**********************************************************************) +(* HTMLization *) +(**********************************************************************) + +(* Encodes characters that need protection by converting them to + * entity references. E.g. "<" is converted to "<". + * As the entities may be named, there is a dependency on the character + * set. Currently, there are only functions for the Latin 1 alphabet. + *) + +(* THREAD-SAFETY: + * The Html functions are thread-safe. + *) + +module Html : + sig + val encode_from_latin1 : string -> string + (* Encodes the characters 0-8, 11-12, 14-31, '<', '>', '"', '&', + * 127-255. If the characters have a name, a named entity is + * preferred over a numeric entity. + *) + val decode_to_latin1 : string -> string + (* Decodes the string. Unknown named entities are left as they + * are (i.e. decode_to_latin1 "&nonsense;" = "&nonsense;"). + * The same applies to numeric entities greater than 255. + *) + end + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:27 lpadovan + * Initial revision + * + * Revision 1.4 2000/06/25 22:34:43 gerd + * Added labels to arguments. + * + * Revision 1.3 2000/06/25 21:15:48 gerd + * Checked thread-safety. + * + * Revision 1.2 2000/03/03 01:08:29 gerd + * Added Netencoding.Html functions. + * + * Revision 1.1 2000/03/02 01:14:48 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/nethtml.ml b/helm/DEVEL/pxp/netstring/nethtml.ml new file mode 100644 index 000000000..7f9d983cd --- /dev/null +++ b/helm/DEVEL/pxp/netstring/nethtml.ml @@ -0,0 +1,276 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +open Nethtml_scanner;; + +type document = + Element of (string * (string*string) list * document list) + | Data of string +;; + + +exception End_of_scan;; + + +let no_end_tag = (* empty HTML elements *) + ref + [ "isindex"; + "base"; + "meta"; + "link"; + "hr"; + "input"; + "img"; + "param"; + "basefont"; + "br"; + "area"; + ] +;; + + +let special_tag = (* other lexical rules *) + ref + [ "script"; + "style"; + ] +;; + + +let rec parse_comment buf = + let t = scan_comment buf in + match t with + Mcomment -> + parse_comment buf + | Eof -> + raise End_of_scan + | _ -> + () +;; + + +let rec parse_doctype buf = + let t = scan_doctype buf in + match t with + Mdoctype -> + parse_doctype buf + | Eof -> + raise End_of_scan + | _ -> + () +;; + + +let parse_document buf = + let current_name = ref "" in + let current_atts = ref [] in + let current_subs = ref [] in + let stack = Stack.create() in + + let parse_atts() = + let rec next_no_space() = + match scan_element buf with + Space _ -> next_no_space() + | t -> t + in + + let rec parse_atts_lookahead next = + match next with + Relement -> [] + | Name n -> + begin match next_no_space() with + Is -> + begin match next_no_space() with + Name v -> + (String.lowercase n, String.uppercase v) :: + parse_atts_lookahead (next_no_space()) + | Literal v -> + (String.lowercase n,v) :: + parse_atts_lookahead (next_no_space()) + | Eof -> + raise End_of_scan + | Relement -> + (* Illegal *) + [] + | _ -> + (* Illegal *) + parse_atts_lookahead (next_no_space()) + end + | Eof -> + raise End_of_scan + | Relement -> + (* <==> *) + [ String.lowercase n, String.lowercase n ] + | next' -> + (* assume <==> *) + ( String.lowercase n, String.lowercase n ) :: + parse_atts_lookahead next' + end + | Eof -> + raise End_of_scan + | _ -> + (* Illegal *) + parse_atts_lookahead (next_no_space()) + in + parse_atts_lookahead (next_no_space()) + in + + let rec parse_special name = + (* Parse until *) + match scan_special buf with + Lelementend n -> + if n = name then + "" + else + " + raise End_of_scan + | Cdata s -> + s ^ parse_special name + | _ -> + (* Illegal *) + parse_special name + in + + let rec skip_element() = + (* Skip until ">" *) + match scan_element buf with + Relement -> + () + | Eof -> + raise End_of_scan + | _ -> + skip_element() + in + + let rec parse_next() = + let t = scan_document buf in + match t with + Lcomment -> + parse_comment buf; + parse_next() + | Ldoctype -> + parse_doctype buf; + parse_next() + | Lelement name -> + let name = String.lowercase name in + if List.mem name !no_end_tag then begin + let atts = parse_atts() in + current_subs := (Element(name, atts, [])) :: !current_subs; + parse_next() + end + else if List.mem name !special_tag then begin + let atts = parse_atts() in + let data = parse_special name in + (* Read until ">" *) + skip_element(); + current_subs := (Element(name, atts, [Data data])) :: !current_subs; + parse_next() + end + else begin + let atts = parse_atts() in + Stack.push (!current_name, !current_atts, !current_subs) stack; + current_name := name; + current_atts := atts; + current_subs := []; + parse_next() + end + | Cdata data -> + current_subs := (Data data) :: !current_subs; + parse_next() + | Lelementend name -> + let name = String.lowercase name in + (* Read until ">" *) + skip_element(); + (* Search the element to close on the stack: *) + let found = ref (name = !current_name) in + Stack.iter + (fun (old_name, _, _) -> + if name = old_name then found := true) + stack; + (* If not found, the end tag is wrong. Simply ignore it. *) + if not !found then + parse_next() + else begin + (* Put the current element on to the stack: *) + Stack.push (!current_name, !current_atts, !current_subs) stack; + (* If found: Remove the elements from the stack, and append + * them to the previous element as sub elements + *) + let rec remove() = + let old_name, old_atts, old_subs = Stack.pop stack in + (* or raise Stack.Empty *) + if old_name = name then + old_name, old_atts, old_subs + else + let older_name, older_atts, older_subs = remove() in + older_name, + older_atts, + (Element (old_name, old_atts, List.rev old_subs) :: older_subs) + in + let old_name, old_atts, old_subs = remove() in + (* Remove one more element: the element containing the element + * currently being closed. + *) + let new_name, new_atts, new_subs = Stack.pop stack in + current_name := new_name; + current_atts := new_atts; + current_subs := (Element (old_name, old_atts, List.rev old_subs)) + :: new_subs; + (* Go on *) + parse_next() + end + | Eof -> + raise End_of_scan + | _ -> + parse_next() + in + try + parse_next(); + List.rev !current_subs + with + End_of_scan -> + (* Close all remaining elements: *) + Stack.push (!current_name, !current_atts, !current_subs) stack; + let rec remove() = + let old_name, old_atts, old_subs = Stack.pop stack in + (* or raise Stack.Empty *) + try + let older_name, older_atts, older_subs = remove() in + older_name, + older_atts, + (Element (old_name, old_atts, List.rev old_subs) :: older_subs) + with + Stack.Empty -> + old_name, old_atts, old_subs + in + let name, atts, subs = remove() in + List.rev subs +;; + + +let parse_string s = + let buf = Lexing.from_string s in + parse_document buf +;; + + +let parse_file fd = + let buf = Lexing.from_channel fd in + parse_document buf +;; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:28 lpadovan + * Initial revision + * + * Revision 1.1 2000/03/03 01:07:25 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/netstring/nethtml.mli b/helm/DEVEL/pxp/netstring/nethtml.mli new file mode 100644 index 000000000..d7af381cc --- /dev/null +++ b/helm/DEVEL/pxp/netstring/nethtml.mli @@ -0,0 +1,72 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + + +(* The type 'document' represents parsed HTML documents. + * Element (name, args, subnodes): is an element node for an element of + * type 'name' (i.e. written ...) with arguments 'args' + * and subnodes 'subnodes' (the material within the element). The arguments + * are simply name/value pairs. Entity references (something like %xy;) + * occuring in the values are NOT resolved. + * Arguments without values (e.g. \n"; +print_string "\n"; +print_string "\n"; + +print_string "

POST URL-encoded form

\n"; +print_string "
\n"; +print_string "\n"; +print_string "\n"; +print_string "
\n"; + +print_string "

POST FORM-encoded form

\n"; +print_string "
\n"; +print_string "\n"; +print_string "\n"; +print_string "\n"; +print_string "
\n"; + +print_string "

File upload

\n"; +print_string "
\n"; +print_string "\n"; +print_string "\n"; +print_string "\n"; +print_string "
\n"; + + + +print_string "\n"; + +flush stdout +;; + + diff --git a/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml b/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml new file mode 100644 index 000000000..db5eac930 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tests/test_mimestring.ml @@ -0,0 +1,589 @@ +#require "str";; +#directory "..";; +#load "netstring.cma";; + +open Mimestring;; + +(**********************************************************************) +(* scan_structured_value *) +(**********************************************************************) + +let t001() = + let r = scan_structured_value "user@domain.com" [ '@'; '.' ] [] in + r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] +;; + + +let t002() = + let r = scan_structured_value "user @ domain . com" [ '@'; '.' ] [] in + r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] +;; + + +let t003() = + let r = scan_structured_value "user(Do you know him?)@domain.com" [ '@'; '.' ] + [] in + r = [ Atom "user"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] +;; + + +let t004() = + let r = scan_structured_value "user @ domain . com" [ '@'; '.'; ' ' ] [] in + r = [ Atom "user"; Special ' '; Special '@'; Special ' '; Atom "domain"; + Special ' '; Special '.'; Special ' '; Atom "com" ] +;; + + +let t005() = + let r = scan_structured_value "user(Do you know him?)@domain.com" + ['@'; '.'; '('] [] in + r = [ Atom "user"; Special '('; Atom "Do"; Atom "you"; Atom "know"; + Atom "him?)"; Special '@'; Atom "domain"; Special '.'; Atom "com" ] +;; + + +let t006() = + let r = scan_structured_value "\"My.name\"@domain.com" [ '@'; '.' ] [] in + r = [ QString "My.name"; Special '@'; Atom "domain"; Special '.'; + Atom "com" ] +;; + + +let t007() = + let r = scan_structured_value "\"\\\"()@. \"@domain.com" [ '@'; '.' ] [] in + r = [ QString "\"()@. "; Special '@'; Atom "domain"; Special '.'; + Atom "com" ] +;; + + +let t008() = + let r = scan_structured_value "a(b(c(d)e)f)g" [] [] in + r = [ Atom "a"; Atom "g" ] +;; + + +let t009() = + let r = scan_structured_value "a(b(c(d)e)f" [] [] in + r = [ Atom "a" ] +;; + + +let t010() = + let r = scan_structured_value "a(b\\(c\\(d\\)e)f" [] [] in + r = [ Atom "a"; Atom "f" ] +;; + + +let t011() = + let r = scan_structured_value "a(b(c(d)e)f\\" [] [] in + r = [ Atom "a" ] +;; + + +let t012() = + let r = scan_structured_value "\"abc" [] [] in + r = [ QString "abc" ] +;; + + +let t013() = + let r = scan_structured_value "\"abc\\" [] [] in + r = [ QString "abc\\" ] +;; + + +(* New tests for netstring-0.9: *) + +let t020() = + let r = scan_structured_value "user(Do you know him?)@domain.com" + [ '@'; '.' ] [ Return_comments ] in + r = [ Atom "user"; Comment; Special '@'; Atom "domain"; Special '.'; + Atom "com" ] +;; + +let t021() = + let r = scan_structured_value "user (Do you know him?) @ domain . com" + [ '@'; '.'; ' ' ] [] in + r = [ Atom "user"; Special ' '; Special ' '; Special ' '; Special '@'; + Special ' '; Atom "domain"; + Special ' '; Special '.'; Special ' '; Atom "com" ] +;; + +let t022() = + let r = scan_structured_value "user (Do you know him?) @ domain . com" + [ '@'; '.'; ' ' ] [ Return_comments ] in + r = [ Atom "user"; Special ' '; Comment; Special ' '; Special '@'; + Special ' '; Atom "domain"; + Special ' '; Special '.'; Special ' '; Atom "com" ] +;; + +let t023() = + let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" + [] [] in + r = [ Atom "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" ] +;; + +let t024() = + let r = scan_structured_value "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" + [ ] [ Recognize_encoded_words ] in + r = [ EncodedWord("ISO-8859-1", "Q", "Keld_J=F8rn_Simonsen") ] +;; + +let t025() = + let r = scan_structured_value + "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" + [] + [ Recognize_encoded_words ] in + r = [ EncodedWord + ("ISO-8859-1", "B", "SWYgeW91IGNhbiByZWFkIHRoaXMgeW8="); + EncodedWord + ("ISO-8859-2", "B", "dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==") + ] +;; + +(**********************************************************************) +(* s_extended_token *) +(**********************************************************************) + +let scan specials options str = + let scn = create_mime_scanner specials options str in + scan_token_list scn;; + +let t100() = + let r = scan [] [] "Two atoms" in + match r with + [ a1, Atom "Two"; a2, Atom "atoms" ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 3) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 4) && + (get_line a2 = 1) && + (get_column a2 = 4) && + (get_length a2 = 5) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + + +let t101() = + let r = scan [] [] " Two atoms " in + match r with + [ a1, Atom "Two"; a2, Atom "atoms" ] -> + + (get_pos a1 = 2) && + (get_line a1 = 1) && + (get_column a1 = 2) && + (get_length a1 = 3) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 7) && + (get_line a2 = 1) && + (get_column a2 = 7) && + (get_length a2 = 5) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + + +let t102() = + let r = scan [] [] " Two\n atoms " in + match r with + [ a1, Atom "Two"; a2, Atom "atoms" ] -> + + (get_pos a1 = 2) && + (get_line a1 = 1) && + (get_column a1 = 2) && + (get_length a1 = 3) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 7) && + (get_line a2 = 2) && + (get_column a2 = 1) && + (get_length a2 = 5) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t110() = + let r = scan [] [] "\"Two\" \"qstrings\"" in + match r with + [ a1, QString "Two"; a2, QString "qstrings" ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 5) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 6) && + (get_line a2 = 1) && + (get_column a2 = 6) && + (get_length a2 = 10) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t111() = + let r = scan [] [] " \"Two\" \"qstrings\" " in + match r with + [ a1, QString "Two"; a2, QString "qstrings" ] -> + + (get_pos a1 = 2) && + (get_line a1 = 1) && + (get_column a1 = 2) && + (get_length a1 = 5) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 9) && + (get_line a2 = 1) && + (get_column a2 = 9) && + (get_length a2 = 10) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t112() = + let r = scan [] [] " \"Two\nlines\" \"and\nqstrings\" " in + match r with + [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] -> + + (get_pos a1 = 2) && + (get_line a1 = 1) && + (get_column a1 = 2) && + (get_length a1 = 11) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 15) && + (get_line a2 = 2) && + (get_column a2 = 8) && + (get_length a2 = 14) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t113() = + let r = scan [] [] " \"Two\\\nlines\" \"and\\\nqstrings\" " in + match r with + [ a1, QString "Two\nlines"; a2, QString "and\nqstrings" ] -> + + (get_pos a1 = 2) && + (get_line a1 = 1) && + (get_column a1 = 2) && + (get_length a1 = 12) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 16) && + (get_line a2 = 2) && + (get_column a2 = 8) && + (get_length a2 = 15) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t120() = + (* Domain literals are implemented like quoted strings, so only the + * most complicated test case. + *) + let r = scan [] [] " [Two\\\nlines] [and\\\nliterals] " in + match r with + [ a1, DomainLiteral "Two\nlines"; a2, DomainLiteral "and\nliterals" ] -> + + (get_pos a1 = 2) && + (get_line a1 = 1) && + (get_column a1 = 2) && + (get_length a1 = 12) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 16) && + (get_line a2 = 2) && + (get_column a2 = 8) && + (get_length a2 = 15) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t130() = + let r = scan [] [ Return_comments ] "(Two) (comments)" in + match r with + [ a1, Comment; a2, Comment ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 5) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 6) && + (get_line a2 = 1) && + (get_column a2 = 6) && + (get_length a2 = 10) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t131() = + let r = scan [] [ Return_comments ] "(Two\nlines) (and\ncomments)" in + match r with + [ a1, Comment; a2, Comment ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 11) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 12) && + (get_line a2 = 2) && + (get_column a2 = 7) && + (get_length a2 = 14) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t132() = + let r = scan [] [ Return_comments ] "(Two\\\nlines) (and\\\ncomments)" in + match r with + [ a1, Comment; a2, Comment ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 12) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 13) && + (get_line a2 = 2) && + (get_column a2 = 7) && + (get_length a2 = 15) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t133() = + let r = scan [] [ Return_comments ] "(a\n(b\nc)d\ne(f)) atom" in + match r with + [ a1, Comment; a2, Atom "atom" ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 15) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 16) && + (get_line a2 = 4) && + (get_column a2 = 6) && + (get_length a2 = 4) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t140() = + let r = scan [] [] "\031\031" in + match r with + [ a1, Control '\031'; a2, Control '\031' ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 1) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 1) && + (get_line a2 = 1) && + (get_column a2 = 1) && + (get_length a2 = 1) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t150() = + let r = scan [ '\t'; '\n' ] [] " \t\n \n \t" in + match r with + [ a1, Special '\t'; _, Special '\n'; _, Special '\n'; a2, Special '\t'] -> + + (get_pos a1 = 1) && + (get_line a1 = 1) && + (get_column a1 = 1) && + (get_length a1 = 1) && + (separates_adjacent_encoded_words a1 = false) && + + (get_pos a2 = 8) && + (get_line a2 = 3) && + (get_column a2 = 2) && + (get_length a2 = 1) && + (separates_adjacent_encoded_words a2 = false) + + | _ -> + false +;; + +let t160() = + let r = scan [] [ Recognize_encoded_words ] + "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in + match r with + [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd"); + a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 22) && + (separates_adjacent_encoded_words a1 = false) && + (get_decoded_word a1 = "Görd") && + (get_charset a1 = "ISO8859-1") && + + (get_pos a2 = 23) && + (get_line a2 = 1) && + (get_column a2 = 23) && + (get_length a2 = 22) && + (separates_adjacent_encoded_words a2 = false) && + (get_decoded_word a2 = "Görd") && + (get_charset a2 = "ISO8859-1") + + | _ -> + false +;; + +let t161() = + let r = scan [ ' ' ] [ Recognize_encoded_words ] + "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in + match r with + [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd"); + sp, Special ' '; + a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 22) && + (separates_adjacent_encoded_words a1 = false) && + (get_decoded_word a1 = "Görd") && + (get_charset a1 = "ISO8859-1") && + + (get_pos a2 = 23) && + (get_line a2 = 1) && + (get_column a2 = 23) && + (get_length a2 = 22) && + (separates_adjacent_encoded_words a2 = false) && + (get_decoded_word a2 = "Görd") && + (get_charset a2 = "ISO8859-1") && + + (separates_adjacent_encoded_words sp = true) + + | _ -> + false +;; + +let t162() = + let r = scan [ ' ' ] [ Recognize_encoded_words ] + "=?iso8859-1?q?G=F6rd?= =?iso8859-1?q?G=F6rd?=" in + match r with + [ a1, EncodedWord("ISO8859-1", "Q", "G=F6rd"); + sp1, Special ' '; + sp2, Special ' '; + a2, EncodedWord("ISO8859-1", "Q", "G=F6rd"); ] -> + + (get_pos a1 = 0) && + (get_line a1 = 1) && + (get_column a1 = 0) && + (get_length a1 = 22) && + (separates_adjacent_encoded_words a1 = false) && + (get_decoded_word a1 = "Görd") && + (get_charset a1 = "ISO8859-1") && + + (get_pos a2 = 24) && + (get_line a2 = 1) && + (get_column a2 = 24) && + (get_length a2 = 22) && + (separates_adjacent_encoded_words a2 = false) && + (get_decoded_word a2 = "Görd") && + (get_charset a2 = "ISO8859-1") && + + (separates_adjacent_encoded_words sp1 = true) && + (separates_adjacent_encoded_words sp2 = true) + + | _ -> + false +;; + + + +(**********************************************************************) + +let test f n = + if f() then + print_endline ("Test " ^ n ^ " ok") + else + print_endline ("Test " ^ n ^ " FAILED!!!!"); + flush stdout +;; + +test t001 "001";; +test t002 "002";; +test t003 "003";; +test t004 "004";; +test t005 "005";; +test t006 "006";; +test t007 "007";; +test t008 "008";; +test t009 "009";; +test t010 "010";; +test t011 "011";; +test t012 "012";; +test t013 "013";; + +test t020 "020";; +test t021 "021";; +test t022 "022";; +test t023 "023";; +test t024 "024";; +test t025 "025";; + +test t100 "100";; +test t101 "101";; +test t102 "102";; +test t110 "110";; +test t111 "111";; +test t112 "112";; +test t113 "113";; +test t120 "120";; +test t130 "130";; +test t131 "131";; +test t132 "132";; +test t133 "133";; +test t140 "140";; +test t150 "150";; +test t160 "160";; +test t161 "161";; +test t162 "162";; diff --git a/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml b/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml new file mode 100644 index 000000000..29673fa5f --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tests/test_netencoding.ml @@ -0,0 +1,223 @@ +#require "str";; +#directory "..";; +#load "netstring.cma";; + + +open Netencoding;; + +(**********************************************************************) +(* Base64 *) +(**********************************************************************) + +(* Test strings: + * "", "a", "ab", "abc", "abcd", "abcde", + * "abcdefghijklmnopqrstuvwxyz". + *) + +let t001() = + (* ENCODE. No line breaks. *) + Base64.encode "" = "" & + Base64.encode "a" = "YQ==" & + Base64.encode "ab" = "YWI=" & + Base64.encode "abc" = "YWJj" & + Base64.encode "abcd" = "YWJjZA==" & + Base64.encode "abcde" = "YWJjZGU=" & + Base64.encode "abcdefghijklmnopqrstuvwxyz" = + "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" +;; + + +let t002() = + (* ENCODE. Lines with length of 4, separated by LF *) + let abc = "abcdefghijklmnopqrstuvwxyz" in + Base64.encode_substring abc 0 0 4 false = "" & + Base64.encode_substring abc 0 1 4 false = "YQ==\n" & + Base64.encode_substring abc 0 2 4 false = "YWI=\n" & + Base64.encode_substring abc 0 3 4 false = "YWJj\n" & + Base64.encode_substring abc 0 4 4 false = "YWJj\nZA==\n" & + Base64.encode_substring abc 0 5 4 false = "YWJj\nZGU=\n" & + Base64.encode_substring abc 0 26 4 false = + "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n" +;; + + +let t003() = + (* ENCODE. Lines with length of 5, separated by LF *) + let abc = "abcdefghijklmnopqrstuvwxyz" in + Base64.encode_substring abc 0 0 5 false = "" & + Base64.encode_substring abc 0 1 5 false = "YQ==\n" & + Base64.encode_substring abc 0 2 5 false = "YWI=\n" & + Base64.encode_substring abc 0 3 5 false = "YWJj\n" & + Base64.encode_substring abc 0 4 5 false = "YWJj\nZA==\n" & + Base64.encode_substring abc 0 5 5 false = "YWJj\nZGU=\n" & + Base64.encode_substring abc 0 26 5 false = + "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n" +;; + + +let t004() = + (* ENCODE. Lines with length of 7, separated by LF *) + let abc = "abcdefghijklmnopqrstuvwxyz" in + Base64.encode_substring abc 0 0 7 false = "" & + Base64.encode_substring abc 0 1 7 false = "YQ==\n" & + Base64.encode_substring abc 0 2 7 false = "YWI=\n" & + Base64.encode_substring abc 0 3 7 false = "YWJj\n" & + Base64.encode_substring abc 0 4 7 false = "YWJj\nZA==\n" & + Base64.encode_substring abc 0 5 7 false = "YWJj\nZGU=\n" & + Base64.encode_substring abc 0 26 7 false = + "YWJj\nZGVm\nZ2hp\namts\nbW5v\ncHFy\nc3R1\ndnd4\neXo=\n" +;; + + +let t005() = + (* ENCODE. Lines with length of 8, separated by LF *) + let abc = "abcdefghijklmnopqrstuvwxyz" in + Base64.encode_substring abc 0 0 8 false = "" & + Base64.encode_substring abc 0 1 8 false = "YQ==\n" & + Base64.encode_substring abc 0 2 8 false = "YWI=\n" & + Base64.encode_substring abc 0 3 8 false = "YWJj\n" & + Base64.encode_substring abc 0 4 8 false = "YWJjZA==\n" & + Base64.encode_substring abc 0 5 8 false = "YWJjZGU=\n" & + Base64.encode_substring abc 0 26 8 false = + "YWJjZGVm\nZ2hpamts\nbW5vcHFy\nc3R1dnd4\neXo=\n" +;; + + +let t006() = + (* ENCODE. Lines with length of 8, separated by CRLF *) + let abc = "abcdefghijklmnopqrstuvwxyz" in + Base64.encode_substring abc 0 0 8 true = "" & + Base64.encode_substring abc 0 1 8 true = "YQ==\r\n" & + Base64.encode_substring abc 0 2 8 true = "YWI=\r\n" & + Base64.encode_substring abc 0 3 8 true = "YWJj\r\n" & + Base64.encode_substring abc 0 4 8 true = "YWJjZA==\r\n" & + Base64.encode_substring abc 0 5 8 true = "YWJjZGU=\r\n" & + Base64.encode_substring abc 0 26 8 true = + "YWJjZGVm\r\nZ2hpamts\r\nbW5vcHFy\r\nc3R1dnd4\r\neXo=\r\n" +;; + + +let t020() = + (* DECODE. First test without spaces *) + Base64.decode_substring "" 0 0 false false = "" & + Base64.decode_substring "YQ==" 0 4 false false = "a" & + Base64.decode_substring "YWI=" 0 4 false false = "ab" & + Base64.decode_substring "YWJj" 0 4 false false = "abc" & + Base64.decode_substring "YWJjZA==" 0 8 false false = "abcd" & + Base64.decode_substring "YWJjZGU=" 0 8 false false = "abcde" & + Base64.decode_substring + "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=" 0 36 false false = + "abcdefghijklmnopqrstuvwxyz" +;; + + +let t021() = + (* DECODE. With spaces *) + Base64.decode_substring " \r\n\t" 0 4 false true = "" & + Base64.decode_substring " Y W J j\n Z G U = " 0 18 false true = "abcde" +;; + + +let t022() = + (* DECODE. With URL characters and spaces *) + Base64.decode_substring " Y W J j\n Z G U = " 0 18 true true = "abcde" & + Base64.decode_substring " Y W J j\n Z G U . " 0 18 true true = "abcde" +;; + +(**********************************************************************) +(* Quoted Printable *) +(**********************************************************************) + +let t100() = + (* ENCODE. *) + QuotedPrintable.encode "a %= 12345 &$[]\"" = "a %=3D 12345 &=24=5B=5D=22" & + QuotedPrintable.encode "\000\001\002" = "=00=01=02" & + QuotedPrintable.encode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" & + QuotedPrintable.encode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" & + QuotedPrintable.encode "abc \r\n def\nghi " = "abc=20\r\n def\nghi=20" +;; + + +let t120() = + (* DECODE. *) + QuotedPrintable.decode "a %=3D 12345 &=24=5B=5D=22" = "a %= 12345 &$[]\"" & + QuotedPrintable.decode "=00=01=02" = "\000\001\002" & + QuotedPrintable.decode "abc\r\ndef\nghi" = "abc\r\ndef\nghi" & + QuotedPrintable.decode " abc\r\n def\n ghi" = " abc\r\n def\n ghi" & + QuotedPrintable.decode "abc=20\r\n def\nghi=20" = "abc \r\n def\nghi " & + QuotedPrintable.decode "abc=\r\n def\nghi=20" = "abc def\nghi " +;; + +(**********************************************************************) +(* Q *) +(**********************************************************************) + +let t200() = + (* ENCODE. *) + Q.encode "a %= 12345 &$[]\"" = "a=20=25=3D=2012345=20=26=24=5B=5D=22" & + Q.encode "\000\001\002\r\n" = "=00=01=02=0D=0A" +;; + + +let t220() = + (* DECODE. *) + Q.decode "a=20=25=3D=2012345=20=26=24=5B=5D=22" = "a %= 12345 &$[]\"" & + Q.decode "=00=01=02=0D=0A" = "\000\001\002\r\n" & + Q.decode "a=20=25=3d=2012345=20=26=24=5b=5d=22" = "a %= 12345 &$[]\"" +;; + +(**********************************************************************) +(* Url *) +(**********************************************************************) + +(* Already tested for Cgi *) + +(**********************************************************************) +(* Html *) +(**********************************************************************) + +let t300() = + Html.encode_from_latin1 "<>&\"abcdefäöÜ\160\025'" = + "<>&"abcdefäöÜ '" +;; + + +let t320() = + Html.decode_to_latin1 + "<>&"abcdefäöÜ " = + "<>&\"abcdefäöÜ\160\025" & + Html.decode_to_latin1 "'" = "'" & + Html.decode_to_latin1 "&nonsense;" = "&nonsense;" & + Html.decode_to_latin1 "Ā" = "Ā" +;; + + +(**********************************************************************) + +let test f n = + if f() then + print_endline ("Test " ^ n ^ " ok") + else + print_endline ("Test " ^ n ^ " FAILED!!!!"); + flush stdout +;; + +test t001 "001"; +test t002 "002"; +test t003 "003"; +test t004 "004"; +test t005 "005"; +test t006 "006"; + +test t020 "020"; +test t021 "021"; +test t022 "022"; + +test t100 "100"; +test t120 "120"; + +test t200 "200"; +test t220 "220"; + +test t300 "300"; +test t320 "320"; diff --git a/helm/DEVEL/pxp/netstring/tests/test_neturl.ml b/helm/DEVEL/pxp/netstring/tests/test_neturl.ml new file mode 100644 index 000000000..633bfda09 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tests/test_neturl.ml @@ -0,0 +1,969 @@ +#directory "..";; +#load "netstring.cma";; + +open Neturl;; + + +let expect_malformed_url f = + try ignore(f()); false with Malformed_URL -> true;; + +let works f = + not (expect_malformed_url f) +;; + +(**********************************************************************) +(* extract_url_scheme *) +(**********************************************************************) + +let t001 () = + extract_url_scheme "a:bc" = "a" && + extract_url_scheme "A:bc" = "a" && + extract_url_scheme "a:b:c" = "a" && + extract_url_scheme "a+b-c:d:e" = "a+b-c" +;; + + +let t002 () = + let test s = + try ignore(extract_url_scheme s); false with Malformed_URL -> true + in + test "a" && + test "a/b:c" && + test "%61:b" && + test "a%3ab" +;; + +(**********************************************************************) +(* url_syntax *) +(**********************************************************************) + +let hashtbl_for_all f h = + let b = ref true in + Hashtbl.iter + (fun k v -> b := !b && f k v) + h; + !b +;; + +let t010 () = + url_syntax_is_valid null_url_syntax && + url_syntax_is_valid ip_url_syntax && + hashtbl_for_all + (fun _ syn -> + url_syntax_is_valid syn + ) + common_url_syntax +;; + +let t011 () = + url_syntax_is_valid (partial_url_syntax null_url_syntax) && + url_syntax_is_valid (partial_url_syntax ip_url_syntax) && + hashtbl_for_all + (fun _ syn -> + url_syntax_is_valid (partial_url_syntax syn) + ) + common_url_syntax +;; + +let t012 () = + let f = fun _ -> true in + let syn = + { url_enable_scheme = Url_part_not_recognized; + url_enable_user = Url_part_required; + url_enable_password = Url_part_allowed; + url_enable_host = Url_part_required; + url_enable_port = Url_part_not_recognized; + url_enable_path = Url_part_required; + url_enable_param = Url_part_not_recognized; + url_enable_query = Url_part_not_recognized; + url_enable_fragment = Url_part_required; + url_enable_other = Url_part_not_recognized; + url_accepts_8bits = false; + url_is_valid = f; + } in + let syn' = partial_url_syntax syn in + + (syn'.url_enable_scheme = Url_part_not_recognized) && + (syn'.url_enable_user = Url_part_allowed) && + (syn'.url_enable_password = Url_part_allowed) && + (syn'.url_enable_host = Url_part_allowed) && + (syn'.url_enable_port = Url_part_not_recognized) && + (syn'.url_enable_path = Url_part_allowed) && + (syn'.url_enable_param = Url_part_not_recognized) && + (syn'.url_enable_query = Url_part_not_recognized) && + (syn'.url_enable_fragment = Url_part_allowed) && + (syn'.url_enable_other = Url_part_not_recognized) && + (syn'.url_is_valid == f) && + + url_syntax_is_valid syn && + url_syntax_is_valid syn' +;; + +(**********************************************************************) +(* make_url *) +(**********************************************************************) + +let t020 () = + (* Basic functionality: *) + let http_syn = Hashtbl.find common_url_syntax "http" in + + let u1 = make_url + (* default: not encoded *) + ~scheme:"http" + ~user:"U" + ~password:"%()~$@" + ~host:"a.b.c" + ~port:81 + ~path:["";"?";""] + http_syn in + + url_provides + ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true + u1 && + + not + (url_provides + ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true + ~query:true u1) && + + (url_syntax_of_url u1 == http_syn) && + + (url_scheme u1 = "http") && + (url_user u1 = "U") && + (url_password u1 = "%()~$@") && + (url_host u1 = "a.b.c") && + (url_port u1 = 81) && + (url_path u1 = ["";"?";""]) && + + (url_user ~encoded:true u1 = "U") && + (url_password ~encoded:true u1 = "%25()%7E$%40") && + (url_path ~encoded:true u1 = ["";"%3F";""]) && + + string_of_url u1 = "http://U:%25()%7E$%40@a.b.c:81/%3F/" +;; + + +let t021 () = + (* Basic functionality: *) + let http_syn = Hashtbl.find common_url_syntax "http" in + + let u1 = make_url + ~encoded:true + ~scheme:"http" + ~user:"%55" + ~password:"%25()%7e$%40" + ~host:"a.b.c" + ~port:81 + ~path:["";"%3F";""] + http_syn in + + url_provides + ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true + u1 && + + not + (url_provides + ~scheme:true ~user:true ~password:true ~host:true ~port:true ~path:true + ~query:true u1) && + + (url_syntax_of_url u1 == http_syn) && + + (url_scheme u1 = "http") && + (url_user u1 = "U") && + (url_password u1 = "%()~$@") && + (url_host u1 = "a.b.c") && + (url_port u1 = 81) && + (url_path u1 = ["";"?";""]) && + + (url_user ~encoded:true u1 = "%55") && + (url_password ~encoded:true u1 = "%25()%7e$%40") && + (url_path ~encoded:true u1 = ["";"%3F";""]) && + + string_of_url u1 = "http://%55:%25()%7e$%40@a.b.c:81/%3F/" +;; + + +(* NEGATIVE TESTS *) + +let t030 () = + (* It is not possible to add a component which is not recognized *) + let http_syn = Hashtbl.find common_url_syntax "http" in + + expect_malformed_url + (fun () -> + make_url + ~scheme:"http" + ~user:"U" + ~password:"%()~$@" + ~host:"a.b.c" + ~port:81 + ~path:["";"?";""] + ~fragment:"abc" + http_syn) +;; + + +let t031 () = + (* It is not possible to put malformed '%'-encodings into the URL *) + let http_syn = Hashtbl.find common_url_syntax "http" in + + works (* reference *) + (fun () -> + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:"XX" + ~host:"a.b.c" + ~port:81 + ~path:["";"a";""] + http_syn) && + + expect_malformed_url + (fun () -> + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:"%XX" + ~host:"a.b.c" + ~port:81 + ~path:["";"a";""] + http_syn) && + + expect_malformed_url + (fun () -> + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:"%X" + ~host:"a.b.c" + ~port:81 + ~path:["";"a";""] + http_syn) && + + expect_malformed_url + (fun () -> + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:"%" + ~host:"a.b.c" + ~port:81 + ~path:["";"a";""] + http_syn) +;; + +let t032 () = + (* It is not possible to put unsafe characters into the URL *) + let http_syn = Hashtbl.find common_url_syntax "http" in + + let make c = + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:(String.make 1 c) + ~host:"a.b.c" + ~port:81 + ~path:["";"a";""] + http_syn + in + + works (fun () -> make 'a') && (* reference *) + + (* List of unsafe characters taken from RFC1738: *) + expect_malformed_url (fun () -> make '<') && + expect_malformed_url (fun () -> make '>') && + expect_malformed_url (fun () -> make '"') && + expect_malformed_url (fun () -> make '#') && + (* Note: '#' would be considered as reserved if fragments were enabled *) + expect_malformed_url (fun () -> make '%') && + expect_malformed_url (fun () -> make '{') && + expect_malformed_url (fun () -> make '}') && + expect_malformed_url (fun () -> make '|') && + expect_malformed_url (fun () -> make '\\') && + expect_malformed_url (fun () -> make '^') && + expect_malformed_url (fun () -> make '[') && + expect_malformed_url (fun () -> make ']') && + expect_malformed_url (fun () -> make '`') && + expect_malformed_url (fun () -> make '~') && + (* Note: '~' is considered as safe in paths: *) + works + (fun () -> + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:"a" + ~host:"a.b.c" + ~port:81 + ~path:["";"~";""] + http_syn) +;; + +let t033 () = + (* It is not possible to put reserved characters into the URL *) + let http_syn = Hashtbl.find common_url_syntax "http" in + + let make_password c = + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:(String.make 1 c) + ~host:"a.b.c" + ~port:81 + ~path:["";"a";""] + http_syn + in + let make_path c = + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:"a" + ~host:"a.b.c" + ~port:81 + ~path:["";String.make 1 c;""] + http_syn + in + let make_query c = + make_url + ~encoded:true + ~scheme:"http" + ~user:"U" + ~password:"a" + ~host:"a.b.c" + ~port:81 + ~path:["";"a";""] + ~query:(String.make 1 c) + http_syn + in + + (* Note: There is a difference between RFC 1738 and RFC 1808 regarding + * which characters are reserved. RFC 1808 defines a fixed set of characters + * as reserved while RFC 1738 defines the reserved characters depending + * on the scheme. + * This implementation of URLs follows RFC 1738 (because of practical + * reasons). + *) + + works (fun () -> make_password 'a') && (* reference *) + works (fun () -> make_path 'a') && + works (fun () -> make_query 'a') && + + expect_malformed_url (fun () -> make_password ':') && + expect_malformed_url (fun () -> make_password '@') && + expect_malformed_url (fun () -> make_password '/') && + works (fun () -> make_password ';') && + works (fun () -> make_password '?') && + works (fun () -> make_password '=') && + works (fun () -> make_password '&') && + + (* Note: ';' is allowed in path and query because parameters are not + * recognized in HTTP syntax. + *) + + expect_malformed_url (fun () -> make_path '/') && + expect_malformed_url (fun () -> make_path '?') && + works (fun () -> make_path ':') && + works (fun () -> make_path '@') && + works (fun () -> make_path ';') && + works (fun () -> make_path '=') && + works (fun () -> make_path '&') && + + expect_malformed_url (fun () -> make_query '?') && + works (fun () -> make_query '/') && + works (fun () -> make_query ':') && + works (fun () -> make_query '@') && + works (fun () -> make_query ';') && + works (fun () -> make_query '=') && + works (fun () -> make_query '&') +;; + + +let t034 () = + (* It is not possible to create a URL with a password, but without user; + * and neither to create a URL with a port, but without host; + * and neither to create a URL with a user, but without host + *) + + expect_malformed_url + (fun () -> + make_url + ~scheme:"http" + ~password:"a" + ~host:"a.b.c" + ~path:["";"a";""] + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~scheme:"http" + ~user:"U" + ~path:["";"a";""] + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~scheme:"http" + ~port:81 + ~path:["";"a";""] + ip_url_syntax) +;; + + +let t035 () = + (* It is not possible to create a URL with illegal scheme prefix *) + + (* reference: *) + works + (fun () -> + make_url + ~scheme:"a" + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~scheme:":" + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~scheme:"a=b" + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~scheme:"a%62b" + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~scheme:"a&b" + ip_url_syntax) +;; + + +let t036 () = + (* It is not possible to have a path with double slashes *) + + (* reference: *) + works + (fun () -> + make_url + ~path:["";"a";""] + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~path:["";""] + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~path:["a";"";""] + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~path:["";"";"a"] + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~path:["a";"";"a"] + ip_url_syntax) +;; + + +let t037 () = + (* It is not possible to have port numbers outside 0..65535 *) + + (* reference: *) + works + (fun () -> + make_url + ~host:"a" + ~port:1 + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~host:"a" + ~port:(-1) + ip_url_syntax) && + + expect_malformed_url + (fun () -> + make_url + ~host:"a" + ~port:65536 + ip_url_syntax) +;; + + +let t038 () = + (* Several cases which are not allowed. *) + + expect_malformed_url + (fun () -> + make_url + ~host:"a" + ~path:["a"] + ip_url_syntax + ) && (* illegal: host + relative path *) + + expect_malformed_url + (fun () -> + make_url + ~host:"a" + ~path:[] + ~param:["x"] + ip_url_syntax + ) && (* illegal: host + no path + params *) + + expect_malformed_url + (fun () -> + make_url + ~host:"a" + ~path:[] + ~query:"x" + ip_url_syntax + ) (* illegal: host + no path + query *) +;; + +(**********************************************************************) +(* url_of_string *) +(**********************************************************************) + +let t050 () = + (* absolute URLs with ip_url_syntax *) + let identical s = + string_of_url (url_of_string ip_url_syntax s) = s in + + let fails s = + try ignore(url_of_string ip_url_syntax s); false + with Malformed_URL -> true + in + + identical "http:" && + + identical "http://host" && + identical "http://user@host" && + identical "http://user:password@host" && + identical "http://user@host:99" && + identical "http://user:password@host:99" && + + identical "http://host/" && + identical "http://user@host/" && + identical "http://user:password@host/" && + identical "http://user@host:99/" && + identical "http://user:password@host:99/" && + + identical "http://host/a/b" && + identical "http://user@host/a/b" && + identical "http://user:password@host/a/b" && + identical "http://user@host:99/a/b" && + identical "http://user:password@host:99/a/b" && + + identical "http://host/a/b/" && + identical "http://user@host/a/b/" && + identical "http://user:password@host/a/b/" && + identical "http://user@host:99/a/b/" && + identical "http://user:password@host:99/a/b/" && + + identical "http://host/?a=b&c=d" && + identical "http://user@host/?a=b&c=d" && + identical "http://user:password@host/?a=b&c=d" && + identical "http://user@host:99/?a=b&c=d" && + identical "http://user:password@host:99/?a=b&c=d" && + + fails "http://host?a=b&c=d" && + fails "http://user@host?a=b&c=d" && + fails "http://user:password@host?a=b&c=d" && + fails "http://user@host:99?a=b&c=d" && + fails "http://user:password@host:99?a=b&c=d" && + + identical "http://host/?a=/&c=/" && + identical "http://user@host/?a=/&c=/" && + identical "http://user:password@host/?a=/&c=/" && + identical "http://user@host:99/?a=/&c=/" && + identical "http://user:password@host:99/?a=/&c=/" && + + identical "http://host/;a;b" && + identical "http://user@host/;a;b" && + identical "http://user:password@host/;a;b" && + identical "http://user@host:99/;a;b" && + identical "http://user:password@host:99/;a;b" && + + fails "http://host;a;b" && + fails "http://user@host;a;b" && + fails "http://user:password@host;a;b" && + fails "http://user@host:99;a;b" && + fails "http://user:password@host:99;a;b" && + + identical "http://host/;a;b?a=b&c=d" && + identical "http://user@host/;a;b?a=b&c=d" && + identical "http://user:password@host/;a;b?a=b&c=d" && + identical "http://user@host:99/;a;b?a=b&c=d" && + identical "http://user:password@host:99/;a;b?a=b&c=d" && + + identical "http:#f" && + + identical "http://host#f" && + identical "http://user@host#f" && + identical "http://user:password@host#f" && + identical "http://user@host:99#f" && + identical "http://user:password@host:99#f" && + + identical "http://host/;a;b?a=b&c=d#f" && + identical "http://user@host/;a;b?a=b&c=d#f" && + identical "http://user:password@host/;a;b?a=b&c=d#f" && + identical "http://user@host:99/;a;b?a=b&c=d#f" && + identical "http://user:password@host:99/;a;b?a=b&c=d#f" && + + true +;; + + +let t051 () = + (* relative URLs with ip_url_syntax *) + let identical s = + string_of_url (url_of_string ip_url_syntax s) = s in + + let fails s = + try ignore(url_of_string ip_url_syntax s); false + with Malformed_URL -> true + in + + identical "//host" && + identical "//user@host" && + identical "//user:password@host" && + identical "//user@host:99" && + identical "//user:password@host:99" && + + identical "//host/" && + identical "//user@host/" && + identical "//user:password@host/" && + identical "//user@host:99/" && + identical "//user:password@host:99/" && + + identical "//host#f" && + identical "//user@host#f" && + identical "//user:password@host#f" && + identical "//user@host:99#f" && + identical "//user:password@host:99#f" && + + identical "/" && + identical "/a" && + identical "/a/" && + identical "/a/a" && + + identical "/;a;b" && + identical "/a;a;b" && + identical "/a/;a;b" && + identical "/a/a;a;b" && + + identical "/?a=b&c=d" && + identical "/a?a=b&c=d" && + identical "/a/?a=b&c=d" && + identical "/a/a?a=b&c=d" && + + identical "/;a;b?a=b&c=d" && + identical "/a;a;b?a=b&c=d" && + identical "/a/;a;b?a=b&c=d" && + identical "/a/a;a;b?a=b&c=d" && + + identical "/#f" && + identical "/a#f" && + identical "/a/#f" && + identical "/a/a#f" && + + identical "/;a;b#f" && + identical "/a;a;b#f" && + identical "/a/;a;b#f" && + identical "/a/a;a;b#f" && + + identical "/;a;b?a=b&c=d#f" && + identical "/a;a;b?a=b&c=d#f" && + identical "/a/;a;b?a=b&c=d#f" && + identical "/a/a;a;b?a=b&c=d#f" && + + identical "" && + identical "a" && + identical "a/" && + identical "a/a" && + + identical ";a;b" && + identical "a;a;b" && + identical "a/;a;b" && + identical "a/a;a;b" && + + identical "?a=b&c=d" && + identical "a?a=b&c=d" && + identical "a/?a=b&c=d" && + identical "a/a?a=b&c=d" && + + identical ";a;b?a=b&c=d" && + identical "a;a;b?a=b&c=d" && + identical "a/;a;b?a=b&c=d" && + identical "a/a;a;b?a=b&c=d" && + + identical "#f" && + identical "a#f" && + identical "a/#f" && + identical "a/a#f" && + + identical ";a;b#f" && + identical "a;a;b#f" && + identical "a/;a;b#f" && + identical "a/a;a;b#f" && + + identical ";a;b?a=b&c=d#f" && + identical "a;a;b?a=b&c=d#f" && + identical "a/;a;b?a=b&c=d#f" && + identical "a/a;a;b?a=b&c=d#f" && + + identical "." && + identical "./" && + identical "./a" && + + identical ".;a;b" && + identical "./;a;b" && + identical "./a;a;b" && + + identical ".?a=b&c=d" && + identical "./?a=b&c=d" && + identical "./a?a=b&c=d" && + + identical ".;a;b?a=b&c=d" && + identical "./;a;b?a=b&c=d" && + identical "./a;a;b?a=b&c=d" && + + identical ".#f" && + identical "./#f" && + identical "./a#f" && + + identical ".;a;b#f" && + identical "./;a;b#f" && + identical "./a;a;b#f" && + + identical ".;a;b?a=b&c=d#f" && + identical "./;a;b?a=b&c=d#f" && + identical "./a;a;b?a=b&c=d#f" && + + identical ".." && + identical "../" && + identical "../a" && + + identical "..;a;b" && + identical "../;a;b" && + identical "../a;a;b" && + + identical "..?a=b&c=d" && + identical "../?a=b&c=d" && + identical "../a?a=b&c=d" && + + identical "..;a;b?a=b&c=d" && + identical "../;a;b?a=b&c=d" && + identical "../a;a;b?a=b&c=d" && + + identical "..#f" && + identical "../#f" && + identical "../a#f" && + + identical "..;a;b#f" && + identical "../;a;b#f" && + identical "../a;a;b#f" && + + identical "..;a;b?a=b&c=d#f" && + identical "../;a;b?a=b&c=d#f" && + identical "../a;a;b?a=b&c=d#f" && + + string_of_url + (make_url ~path:["a:b"] ip_url_syntax) = "a%3Ab" && + + string_of_url + (make_url ~encoded:true ~path:["a:b"] ip_url_syntax) = "./a:b" && + + true +;; + + +let t052 () = + (* mailto: URLs *) + let mailto_syn = Hashtbl.find common_url_syntax "mailto" in + + let identical s = + string_of_url (url_of_string mailto_syn s) = s in + + let fails s = + try ignore(url_of_string mailto_syn s); false + with Malformed_URL -> true + in + + identical "mailto:user@host" && + identical "mailto:user@host;?;?" && + fails "mailto:user@host#f" +;; + +(**********************************************************************) +(* split_path/join_path/norm_path: *) +(**********************************************************************) + +let t060 () = + (split_path "" = []) && + (split_path "/" = [ "" ]) && + (split_path "/a" = [ ""; "a" ]) && + (split_path "a" = [ "a" ]) && + (split_path "a/" = [ "a"; "" ]) && + (split_path "/a/" = [ ""; "a"; "" ]) && + (split_path "/a/b" = [ ""; "a"; "b" ]) && + (split_path "/a/b/" = [ ""; "a"; "b"; "" ]) && + (split_path "/a/b/c" = [ ""; "a"; "b"; "c" ]) && + + (join_path [] = "") && + (join_path [ "" ] = "/") && + (join_path [ ""; "a" ] = "/a") && + (join_path [ "a" ] = "a") && + (join_path [ "a"; "" ] = "a/") && + (join_path [ ""; "a"; "" ] = "/a/") && + (join_path [ ""; "a"; "b" ] = "/a/b") && + (join_path [ ""; "a"; "b"; "" ] = "/a/b/") && + (join_path [ ""; "a"; "b"; "c" ] = "/a/b/c") && + + true +;; + + +let t061 () = + (norm_path ["."] = []) && + (norm_path ["."; ""] = []) && + (norm_path ["a"; "."] = ["a"; ""]) && + (norm_path ["a"; "b"; "."] = ["a"; "b"; ""]) && + (norm_path ["a"; "b"; ".."] = ["a"; ""]) && + (norm_path ["a"; "."; "b"; "."] = ["a"; "b"; ""]) && + (norm_path [".."] = [".."; ""]) && + (norm_path [".."; ""] = [".."; ""]) && + (norm_path ["a"; "b"; ".."; "c" ] = ["a"; "c"]) && + (norm_path ["a"; "b"; ".."; "c"; ""] = ["a"; "c"; ""]) && + (norm_path ["";"";"a";"";"b"] = [""; "a"; "b"]) && + (norm_path ["a"; "b"; ""; ".."; "c"; ""] = ["a"; "c"; ""]) && + (norm_path ["a"; ".."] = []) && + (norm_path ["";""] = [""]) && + (norm_path [""] = [""]) && + (norm_path [] = []) && + + true +;; + +(**********************************************************************) +(* apply_relative_url: *) +(**********************************************************************) + +let t070() = + (* Examples taken from RFC 1808 *) + let url = url_of_string ip_url_syntax in + let base = url "http://a/b/c/d;p?q#f" in + let aru = apply_relative_url base in + + (aru (url "g:h") = url "g:h") && + (aru (url "g") = url "http://a/b/c/g") && + (aru (url "./g") = url "http://a/b/c/g") && + (aru (url "g/") = url "http://a/b/c/g/") && + (aru (url "/g") = url "http://a/g") && + (aru (url "//g") = url "http://g") && + (aru (url "?y") = url "http://a/b/c/d;p?y") && + (aru (url "g?y") = url "http://a/b/c/g?y") && + (aru (url "g?y/./x") = url "http://a/b/c/g?y/./x") && + (aru (url "#s") = url "http://a/b/c/d;p?q#s") && + (aru (url "g#s") = url "http://a/b/c/g#s") && + (aru (url "g#s/./x") = url "http://a/b/c/g#s/./x") && + (aru (url "g?y#s") = url "http://a/b/c/g?y#s") && + (aru (url ";x") = url "http://a/b/c/d;x") && + (aru (url "g;x") = url "http://a/b/c/g;x") && + (aru (url "g;x?y#s") = url "http://a/b/c/g;x?y#s") && + (aru (url ".") = url "http://a/b/c/") && + (aru (url "./") = url "http://a/b/c/") && + (aru (url "..") = url "http://a/b/") && + (aru (url "../") = url "http://a/b/") && + (aru (url "../g") = url "http://a/b/g") && + (aru (url "../..") = url "http://a/") && + (aru (url "../../") = url "http://a/") && + (aru (url "../../g") = url "http://a/g") && + + (aru (url "") = url "http://a/b/c/d;p?q#f") && + (aru (url "../../../g") = url "http://a/../g") && + (aru (url "../../../../g") = url "http://a/../../g") && + (aru (url "/./g") = url "http://a/./g") && + (aru (url "/../g") = url "http://a/../g") && + (aru (url "g.") = url "http://a/b/c/g.") && + (aru (url ".g") = url "http://a/b/c/.g") && + (aru (url "g..") = url "http://a/b/c/g..") && + (aru (url "..g") = url "http://a/b/c/..g") && + (aru (url "./../g") = url "http://a/b/g") && + (aru (url "./g/.") = url "http://a/b/c/g/") && + (aru (url "g/./h") = url "http://a/b/c/g/h") && + (aru (url "g/../h") = url "http://a/b/c/h") && + (aru (url "http:g") = url "http:g") && + (aru (url "http:") = url "http:") && + + true +;; + + +(**********************************************************************) + +let test f n = + if f() then + print_endline ("Test " ^ n ^ " ok") + else + print_endline ("Test " ^ n ^ " FAILED!!!!"); + flush stdout +;; + +test t001 "001"; +test t002 "002"; + +test t010 "010"; +test t011 "011"; +test t012 "012"; + +test t020 "020"; +test t021 "021"; + +test t030 "030"; +test t031 "031"; +test t032 "032"; +test t033 "033"; +test t034 "034"; +test t035 "035"; +test t036 "036"; +test t037 "037"; +test t038 "038"; + +test t050 "050"; +test t051 "051"; +test t052 "052"; + +test t060 "060"; +test t061 "061"; + +test t070 "070"; +() +;; diff --git a/helm/DEVEL/pxp/netstring/tests/test_recode.ml b/helm/DEVEL/pxp/netstring/tests/test_recode.ml new file mode 100644 index 000000000..64a04caae --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tests/test_recode.ml @@ -0,0 +1,169 @@ + + +let make_iso enc = + let s = ref "" in + for i = 0 to 255 do + let u = try Netconversion.makechar (enc :> Netconversion.encoding) i + with Not_found -> "" in + s := !s ^ u + done; + !s +;; + +let make_ucs2 start stop = + let s = String.create ((stop - start) * 2) in + for i = 0 to stop-start-1 do + let k = 2 * i in + let c = i + start in + s.[k] <- Char.chr(c lsr 8); + s.[k+1] <- Char.chr(c land 0xff); + done; + s +;; + +let make_ucs4 start stop = + let s = String.create ((stop - start) * 4) in + for i = 0 to stop-start-1 do + let k = 4 * i in + let c = i + start in + s.[k] <- Char.chr(c lsr 24); + s.[k+1] <- Char.chr((c lsr 16) land 0xff); + s.[k+2] <- Char.chr((c lsr 8) land 0xff); + s.[k+3] <- Char.chr(c land 0xff); + done; + s +;; + +let name_of_encoding enc = + match enc with + `Enc_iso88591 -> "ISO_8859-1" + | `Enc_iso88592 -> "ISO_8859-2" + | `Enc_iso88593 -> "ISO_8859-3" + | `Enc_iso88594 -> "ISO_8859-4" + | `Enc_iso88595 -> "ISO_8859-5" + | `Enc_iso88596 -> "ISO_8859-6" + | `Enc_iso88597 -> "ISO_8859-7" + | `Enc_iso88598 -> "ISO_8859-8" + | `Enc_iso88599 -> "ISO_8859-9" + | `Enc_iso885910 -> "ISO_8859-10" + | `Enc_iso885913 -> "ISO_8859-13" + | `Enc_iso885914 -> "ISO_8859-14" + | `Enc_iso885915 -> "ISO_8859-15" + | `Enc_utf8 -> "UTF-8" + | `Enc_ucs4 -> "UCS-4" + | `Enc_ucs2 -> "UCS-2" + | `Enc_utf16 -> "UTF-16" + + (* Note: GNU-iconv assumes big endian byte order *) +;; + +let iconv_recode_string in_enc out_enc in_s = + let in_enc_name = name_of_encoding in_enc in + let out_enc_name = name_of_encoding out_enc in + let out_s = ref "" in + + let out_ch,in_ch = Unix.open_process ("iconv -f " ^ in_enc_name ^ " -t " ^ + out_enc_name) in + (* Write in_s to in_ch in a new thread: *) + ignore + (Thread.create + (fun () -> + output_string in_ch in_s; + close_out in_ch; + ) + () + ); + (* Read the result in the current thread: *) + let buf = String.create 1024 in + let n = ref 1 in + while !n <> 0 do + let n' = input out_ch buf 0 1024 in + out_s := !out_s ^ String.sub buf 0 n'; + n := n' + done; + ignore(Unix.close_process (out_ch,in_ch)); + !out_s +;; + +let test_iso_and_utf8 enc = + let name = name_of_encoding enc in + print_string ("Recode: " ^ name ^ " and UTF-8... "); flush stdout; + let s = make_iso enc in + let s1' = Netconversion.recode_string (enc :> Netconversion.encoding) + `Enc_utf8 s in + let s2' = iconv_recode_string enc `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 + (enc :> Netconversion.encoding) s1' in + let s2 = iconv_recode_string `Enc_utf8 enc s1' in + assert(s1 = s2 && s1 = s); + print_endline "OK"; flush stdout +;; + +let test_utf16_and_utf8_0000_d7ff () = + print_string "Recode: UTF-16-BE and UTF-8, #0000-#D7FF... "; + flush stdout; + let s = make_ucs2 0 0xd800 in + let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in + let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in + let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in + assert(s1 = s2 && s1 = s); + print_endline "OK"; flush stdout +;; + +let test_utf16_and_utf8_e000_fffd () = + print_string "Recode: UTF-16-BE and UTF-8, #E000-#FFFD... "; + flush stdout; + let s = make_ucs2 0xe000 0xfffe in + let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in + let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in + let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in + assert(s1 = s2 && s1 = s); + print_endline "OK"; flush stdout +;; + +let test_utf16_and_utf8_10000_10FFFF () = + print_string "Recode: UTF-16-BE and UTF-8, #10000-#10FFFF... "; + flush stdout; + for i = 1 to 16 do + let s0 = make_ucs4 (i * 0x10000) (i * 0x10000 + 0x10000) in + let s = iconv_recode_string `Enc_ucs4 `Enc_utf16 s0 in + let s1' = Netconversion.recode_string `Enc_utf16_be `Enc_utf8 s in + let s2' = iconv_recode_string `Enc_utf16 `Enc_utf8 s in + assert(s1' = s2'); + let s1 = Netconversion.recode_string `Enc_utf8 `Enc_utf16_be s1' in + let s2 = iconv_recode_string `Enc_utf8 `Enc_utf16 s1' in + assert(s1 = s2 && s1 = s); + print_string "+"; flush stdout; + done; + print_endline "OK"; flush stdout +;; + + +print_endline "Warning: You need the command 'iconv' to run this test!"; +flush stdout; +test_iso_and_utf8 `Enc_iso88591; +test_iso_and_utf8 `Enc_iso88592; +test_iso_and_utf8 `Enc_iso88593; +test_iso_and_utf8 `Enc_iso88594; +test_iso_and_utf8 `Enc_iso88595; +test_iso_and_utf8 `Enc_iso88596; +test_iso_and_utf8 `Enc_iso88597; +(* test_iso_and_utf8 `Enc_iso88598; *) +test_iso_and_utf8 `Enc_iso88599; +test_iso_and_utf8 `Enc_iso885910; +(* test_iso_and_utf8 `Enc_iso885913; *) +(* test_iso_and_utf8 `Enc_iso885914; *) +(* test_iso_and_utf8 `Enc_iso885915; *) +test_utf16_and_utf8_0000_d7ff(); +test_utf16_and_utf8_e000_fffd(); +(* This test does not work because iconv does not support the surrogate + * representation of UTF-16: + * test_utf16_and_utf8_10000_10FFFF(); + *) +() +;; diff --git a/helm/DEVEL/pxp/netstring/tools/Makefile b/helm/DEVEL/pxp/netstring/tools/Makefile new file mode 100644 index 000000000..b3c148db7 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tools/Makefile @@ -0,0 +1,10 @@ +all: + $(MAKE) -C unimap_to_ocaml + +clean: + +CLEAN: clean + $(MAKE) -C unimap_to_ocaml CLEAN + +distclean: clean + $(MAKE) -C unimap_to_ocaml distclean diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore new file mode 100644 index 000000000..c1fcbc4ae --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/.cvsignore @@ -0,0 +1,7 @@ +*.cmo +*.cmx +*.cmi + +*.o +*.a + diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile new file mode 100644 index 000000000..ed4277389 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/Makefile @@ -0,0 +1,15 @@ +all: unimap_to_ocaml + +unimap_to_ocaml: unimap_to_ocaml.ml + ocamlfind ocamlc -g -package str -linkpkg -custom \ + -o unimap_to_ocaml \ + unimap_to_ocaml.ml + +clean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa + +CLEAN: clean + +distclean: clean + rm -f *~ unimap_to_ocaml + diff --git a/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml new file mode 100644 index 000000000..14a89e9d9 --- /dev/null +++ b/helm/DEVEL/pxp/netstring/tools/unimap_to_ocaml/unimap_to_ocaml.ml @@ -0,0 +1,201 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +open Printf;; + +let comment_re = Str.regexp "#.*$";; +let space_re = Str.regexp "[ \t\r\n]+";; + +let read_unimap_format_a fname f = + (* Reads a Unicode mapping in format A from a "local" code to Unicode. + * Returns a list of pairs (localcode, unicode). + *) + + let read_unimap_line() = + let s = input_line f in (* may raise End_of_file *) + let s' = Str.global_replace comment_re "" s in + let words = Str.split space_re s' in + match words with + [] -> raise Not_found + | [ localcode; unicode ] -> + int_of_string localcode, int_of_string unicode + | _ -> + failwith ("File " ^ fname ^ ": Do not know what to do with:\n" ^ s') + in + + let rec read_following_lines() = + try + let localcode, unicode = read_unimap_line() in + (* may raise End_of_file, Not_found *) + (localcode, unicode) :: read_following_lines() + with + Not_found -> read_following_lines() + | End_of_file -> [] + in + + read_following_lines() +;; + + +type from_uni_list = + U_nil + | U_single of (int * int) + | U_list of (int * int) list + +type from_unicode = + from_uni_list array;; + (* A hashtable with fixed size (256). A pair (unicode, localcode) is + * stored at the position unicode mod 256 in the array. + *) + + +let make_bijection unimap = + (* unimap: a list of pairs (localcode, unicode) + * returns a pair of arrays (m_to_unicode, m_from_unicode) with: + * - m_to_unicode.(localcode) = Some unicode, + * if the pair (localcode, unicode) exists + * m_to_unicode.(x) = None otherwise + * - m_from_unicode.(unicode lsr 8) = [ ...; (unicode,localcode); ... ] + *) + + let m_to_unicode = Array.create 256 None in + let m_from_unicode = Array.create 256 [] in + + List.iter + (fun (localcode, unicode) -> + assert(localcode < 256); + + (* Update m_to_unicode: *) + if m_to_unicode.(localcode) <> None then + failwith ("Local code point " ^ string_of_int localcode ^ + " mapped twice"); + m_to_unicode.(localcode) <- Some unicode; + + (* Update m_from_unicode: *) + let unilow = unicode land 255 in + if List.mem_assoc unicode (m_from_unicode.(unilow)) then + failwith ("Unicode code point " ^ string_of_int unicode ^ + " mapped twice"); + m_from_unicode.(unilow) <- + m_from_unicode.(unilow) @ [unicode,localcode]; + ) + unimap; + + m_to_unicode, m_from_unicode +;; + + +let to_unimap_as_string to_unimap = + let make_repr x = + match x with + None -> -1 + | Some u -> u + in + Marshal.to_string (Array.map make_repr to_unimap) [ Marshal.No_sharing ] +;; + + +let from_unimap_as_string from_unimap = + let make_repr l = + match l with + [] -> U_nil + | [u,l] -> U_single(u,l) + | _ -> U_list l + in + let m = Array.map make_repr from_unimap in + Marshal.to_string m [ Marshal.No_sharing ] +;; + + +let print_bijection f name m_to_unicode m_from_unicode = + (* Prints on file f this O'Caml code: + * let _to_unicode = ... + * let _from_unicode = ... + *) + fprintf f "let %s_to_unicode = lazy (Marshal.from_string \"%s\" 0 : int array);;\n" + name + (String.escaped (to_unimap_as_string m_to_unicode)); + + fprintf f "let %s_from_unicode = lazy (Marshal.from_string \"%s\" 0 : Netmappings.from_uni_list array);;\n " + name + (String.escaped (from_unimap_as_string m_from_unicode)); +;; + + +let main() = + let files = ref [] in + let outch = ref (lazy stdout) in + Arg.parse + [ "-o", Arg.String (fun s -> outch := lazy (open_out s)), + " Write result to this file"] + (fun s -> files := !files @ [s]) + "usage: unimap_to_ocaml file.unimap ..."; + + (* First read in all unimaps: *) + let unimaps = + List.map + (fun filename -> + let mapname = Str.replace_first (Str.regexp "\.unimap$") "" + (Filename.basename filename) in + let f = open_in filename in + prerr_endline ("Reading " ^ filename); + let unimap = read_unimap_format_a filename f in + close_in f; + mapname, unimap + ) + !files + in + + (* Second compute all bijections: *) + let bijections = + List.map + (fun (mapname, unimap) -> + prerr_endline ("Processing " ^ mapname); + let to_unicode, from_unicode = make_bijection unimap in + mapname, to_unicode, from_unicode + ) + unimaps + in + + let out = Lazy.force !outch in + (* Third output all results: *) + output_string out "(* WARNING! This is a generated file! *)\n"; + + List.iter + (fun (mapname, to_unicode, from_unicode) -> + print_bijection out mapname to_unicode from_unicode) + bijections; + List.iter + (fun (mapname, _, _) -> + fprintf out "Hashtbl.add Netmappings.to_unicode `Enc_%s %s_to_unicode;\n" + mapname mapname; + fprintf out "Hashtbl.add Netmappings.from_unicode `Enc_%s %s_from_unicode;\n" + mapname mapname; + ) + (List.rev bijections); + fprintf out "();;\n"; + + close_out out +;; + + +main();; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:29 lpadovan + * Initial revision + * + * Revision 1.3 2000/08/29 00:48:52 gerd + * Conversion tables are now stored in marshalled form. + * New type for the conversion table Unicode to 8bit. + * + * Revision 1.2 2000/08/12 23:54:56 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/.cvsignore b/helm/DEVEL/pxp/pxp/.cvsignore new file mode 100644 index 000000000..deb5b7fba --- /dev/null +++ b/helm/DEVEL/pxp/pxp/.cvsignore @@ -0,0 +1,4 @@ +*.cmo +*.cmx +*.cmi + diff --git a/helm/DEVEL/pxp/pxp/LICENSE b/helm/DEVEL/pxp/pxp/LICENSE new file mode 100644 index 000000000..55182a74d --- /dev/null +++ b/helm/DEVEL/pxp/pxp/LICENSE @@ -0,0 +1,22 @@ +Copyright 1999 by Gerd Stolpmann + +The package "markup" is copyright by Gerd Stolpmann. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this document and the "markup" software (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +The Software is provided ``as is'', without warranty of any kind, express +or implied, including but not limited to the warranties of +merchantability, fitness for a particular purpose and noninfringement. +In no event shall Gerd Stolpmann be liable for any claim, damages or +other liability, whether in an action of contract, tort or otherwise, +arising from, out of or in connection with the Software or the use or +other dealings in the software. diff --git a/helm/DEVEL/pxp/pxp/META b/helm/DEVEL/pxp/pxp/META new file mode 100644 index 000000000..020128a0d --- /dev/null +++ b/helm/DEVEL/pxp/pxp/META @@ -0,0 +1,20 @@ +version = "1.0" +requires = "netstring" +description = "Validating parser for XML-1.0" +archive(byte) = "pxp_types.cma + pxp_lex_iso88591.cma + pxp_lex_utf8.cma + pxp_engine.cma + pxp_utf8.cmo" +archive(byte, pxp_without_utf8) = "pxp_types.cma + pxp_lex_iso88591.cma + pxp_engine.cma" +archive(native) = "pxp_types.cmxa + pxp_lex_iso88591.cmxa + pxp_lex_utf8.cmxa + pxp_engine.cmxa + pxp_utf8.cmx" +archive(native, pxp_without_utf8) = "pxp_types.cmxa + pxp_lex_iso88591.cmxa + pxp_engine.cmxa" + diff --git a/helm/DEVEL/pxp/pxp/Makefile b/helm/DEVEL/pxp/pxp/Makefile new file mode 100644 index 000000000..f08eab99d --- /dev/null +++ b/helm/DEVEL/pxp/pxp/Makefile @@ -0,0 +1,105 @@ +# make all: make bytecode archive +# make opt: make native archive +# make install: install bytecode archive, and if present, native archive +# make uninstall: uninstall package +# make clean: remove intermediate files (in this directory) +# make CLEAN: remove intermediate files (recursively) +# make distclean: remove any superflous files (recursively) +# make release: cleanup, create archive, tag CVS module +# (for developers) + +#---------------------------------------------------------------------- + +include Makefile.conf + +.PHONY: all +all: + $(MAKE) -C m2parsergen all + $(MAKE) -C tools/ucs2_to_utf8 all + $(MAKE) -f Makefile.code all + $(MAKE) -C compatibility all + +.PHONY: opt +opt: + $(MAKE) -C m2parsergen all + $(MAKE) -C tools/ucs2_to_utf8 all + $(MAKE) -f Makefile.code opt + $(MAKE) -C compatibility opt + +.PHONY: install +install: all tmp/pxp_entity.mli + files=`tools/collect_files *.cmi *.cma *.cmxa *.a \ + pxp_utf8.cmo pxp_utf8.cmx pxp_utf8.o` && \ + ocamlfind install $(NAME) $(MLI) tmp/pxp_entity.mli $$files META + +.PHONY: uninstall +uninstall: + ocamlfind remove $(NAME) + +.PHONY: markup-install +markup-install: + $(MAKE) -C compatibility install + +.PHONY: markup-uninstall +markup-uninstall: + $(MAKE) -C compatibility uninstall + +tmp/pxp_entity.mli: pxp_entity.ml + mkdir -p tmp + rm -f tmp/pxp_entity.* + cp pxp_entity.ml tmp + echo '(* Sorry, this is currently undocumented *)' >tmp/mli + ocamlc -i -c tmp/pxp_entity.ml >>tmp/mli + mv tmp/mli tmp/pxp_entity.mli + +.PHONY: clean +clean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old + rm -f pxp_yacc.ml + touch lexers/objects_iso88591 lexers/objects_utf8 lexers/depend + $(MAKE) -C lexers clean + $(MAKE) -C compatibility clean + +.PHONY: CLEAN +CLEAN: clean + $(MAKE) -C doc CLEAN + $(MAKE) -C examples CLEAN + $(MAKE) -C rtests CLEAN + $(MAKE) -C m2parsergen CLEAN + touch tools/ucs2_to_utf8/depend + $(MAKE) -C tools/ucs2_to_utf8 clean + +.PHONY: distclean +distclean: clean + rm -f *~ depend depend.pkg + $(MAKE) -C doc distclean + $(MAKE) -C examples distclean + $(MAKE) -C rtests distclean + $(MAKE) -C m2parsergen distclean + touch tools/ucs2_to_utf8/depend + $(MAKE) -C tools/ucs2_to_utf8 clean + $(MAKE) -C compatibility distclean + +RELEASE: META + awk '/version/ { print substr($$3,2,length($$3)-2) }' META >RELEASE + +.PHONY: dist +dist: RELEASE + r=`head -1 RELEASE`; cd ..; gtar czf $(NAME)-$$r.tar.gz --exclude='*/CVS*' --exclude="*~" --exclude="*/depend.pkg" --exclude="*/depend" --exclude="*/oo_questions*" --exclude="*/testsamples*" --exclude="*/tmp/*" --exclude="*reptil*" --exclude="*/doc/common.xml" --exclude="*/doc/config.xml" --exclude="*.fig.bak" --exclude="*/ps/pic*" --exclude="*/examples/panel*" --exclude="*/examples/xmlforms_gtk*" --exclude="*/Mail*" $(NAME)/* + +.PHONY: tag-release +tag-release: RELEASE + r=`head -1 RELEASE | sed -e s/\\\./-/g`; cd ..; cvs tag -F $(NAME)-$$r markup + +.PHONY: release +release: distclean + $(MAKE) tag-release + $(MAKE) dist + +.PHONY: dev +dev: + $(MAKE) all + -$(MAKE) uninstall + $(MAKE) install + $(MAKE) -C examples/validate distclean + $(MAKE) -C examples/validate validate diff --git a/helm/DEVEL/pxp/pxp/Makefile.code b/helm/DEVEL/pxp/pxp/Makefile.code new file mode 100644 index 000000000..3afed39ca --- /dev/null +++ b/helm/DEVEL/pxp/pxp/Makefile.code @@ -0,0 +1,96 @@ +# make all: make bytecode archives +# make opt: make native archives +#---------------------------------------------------------------------- + +include Makefile.conf + +all: + $(MAKE) -f Makefile.code pxp_types.cma + $(MAKE) -f Makefile.code pxp_lex_iso88591.cma + if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cma; else rm -f pxp_lex_utf8.cma; fi + $(MAKE) -f Makefile.code pxp_engine.cma + if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmo; else rm -f pxp_utf8.cmo; fi + +opt: + $(MAKE) -f Makefile.code pxp_types.cmxa + $(MAKE) -f Makefile.code pxp_lex_iso88591.cmxa + if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_lex_utf8.cmxa; else rm -f pxp_lex_utf8.cmxa; fi + $(MAKE) -f Makefile.code pxp_engine.cmxa + if [ "x$(UTF8_SUPPORT)" = "xyes" ]; then $(MAKE) -f Makefile.code pxp_utf8.cmx; else rm -f pxp_utf8.cmx; fi + +#---------------------------------------------------------------------- + +pxp_types.cma: $(OBJECTS_types) + $(OCAMLC) -a -o pxp_types.cma $(OBJECTS_types) + +pxp_types.cmxa: $(XOBJECTS_types) + $(OCAMLOPT) -a -o pxp_types.cmxa $(XOBJECTS_types) + +pxp_engine.cma: $(OBJECTS_engine) + $(OCAMLC) -a -o pxp_engine.cma $(OBJECTS_engine) + +pxp_engine.cmxa: $(XOBJECTS_engine) + $(OCAMLOPT) -a -o pxp_engine.cmxa $(XOBJECTS_engine) + + +# The following rules are "phony" to force 'make' to go into the +# "lexers" subdirectory. + +.PHONY: pxp_lex_iso88591.cma +pxp_lex_iso88591.cma: $(CMI_types) + $(MAKE) -C lexers all_iso88591 + cp lexers/pxp_lex_iso88591.cma . + +.PHONY: pxp_lex_iso88591.cmxa +pxp_lex_iso88591.cmxa: $(CMI_types) + $(MAKE) -C lexers opt_iso88591 + cp lexers/pxp_lex_iso88591.cmxa lexers/pxp_lex_iso88591.a . + +.PHONY: pxp_lex_utf8.cma +pxp_lex_utf8.cma: $(CMI_types) + $(MAKE) -C lexers all_utf8 + cp lexers/pxp_lex_utf8.cma . + +.PHONY: pxp_lex_utf8.cmxa +pxp_lex_utf8.cmxa: $(CMI_types) + $(MAKE) -C lexers opt_utf8 + cp lexers/pxp_lex_utf8.cmxa lexers/pxp_lex_utf8.a . + +#---------------------------------------------------------------------- +# general rules: + +OPTIONS = +OCAMLC = $(OCAMLFIND) ocamlc -package "$(PACKAGES)" \ + -g -I lexers $(OPTIONS) $(ROPTIONS) +OCAMLOPT = $(OCAMLFIND) ocamlopt -package "$(PACKAGES)" \ + -p -I lexers $(OPTIONS) $(ROPTIONS) +OCAMLDEP = ocamldep $(OPTIONS) +OCAMLFIND = ocamlfind + +depend: *.ml *.mli pxp_yacc.ml + $(OCAMLDEP) *.ml *.mli >depend + +.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .m2y + +.ml.cmx: + $(OCAMLOPT) -c $< + +.ml.cmo: + $(OCAMLC) -c $< + +.mli.cmi: + $(OCAMLC) -c $< + +.mll.ml: + ocamllex $< + +.m2y.ml: + ./m2parsergen/m2parsergen < $< >`basename $< .m2y`.ml || { rm -f `basename $< .m2y`.ml; false; } + +*.mli: + + +# Generated dependencies: + +include depend + diff --git a/helm/DEVEL/pxp/pxp/Makefile.conf b/helm/DEVEL/pxp/pxp/Makefile.conf new file mode 100644 index 000000000..749c702c7 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/Makefile.conf @@ -0,0 +1,37 @@ +# User-configurable section: + +# yes or no: Do you want that the parser has support for the internal +# representation as UTF-8 strings? "yes" is recommended, but the parser +# becomes much bigger +UTF8_SUPPORT = yes + +# --- End of User-configurable section. + +# Settings. + +NAME = pxp +PACKAGES = netstring + +# Caml objects that are needed by the lexers: +OBJECTS_types = \ + pxp_types.cmo pxp_lexer_types.cmo + +CMI_types = $(OBJECTS_types:.cmo=.cmi) + +# Caml objects that depend on the lexers: +OBJECTS_engine = \ + pxp_lexers.cmo \ + pxp_dfa.cmo \ + pxp_aux.cmo pxp_reader.cmo \ + pxp_entity.cmo pxp_dtd.cmo pxp_document.cmo \ + pxp_yacc.cmo pxp_codewriter.cmo + +# Same as native objects: +XOBJECTS_types = $(OBJECTS_types:.cmo=.cmx) +XOBJECTS_engine = $(OBJECTS_engine:.cmo=.cmx) + +# .mli files to install: + +MLI = pxp_document.mli pxp_dtd.mli \ + pxp_types.mli pxp_yacc.mli \ + pxp_codewriter.mli pxp_dfa.mli diff --git a/helm/DEVEL/pxp/pxp/RELEASE b/helm/DEVEL/pxp/pxp/RELEASE new file mode 100644 index 000000000..d3827e75a --- /dev/null +++ b/helm/DEVEL/pxp/pxp/RELEASE @@ -0,0 +1 @@ +1.0 diff --git a/helm/DEVEL/pxp/pxp/compatibility/.cvsignore b/helm/DEVEL/pxp/pxp/compatibility/.cvsignore new file mode 100644 index 000000000..deb5b7fba --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/.cvsignore @@ -0,0 +1,4 @@ +*.cmo +*.cmx +*.cmi + diff --git a/helm/DEVEL/pxp/pxp/compatibility/META b/helm/DEVEL/pxp/pxp/compatibility/META new file mode 100644 index 000000000..441e30a0f --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/META @@ -0,0 +1,6 @@ +version = "PXP-emulator" +requires = "pxp" +description = "Validating parser for XML-1.0" +archive(byte) = "markup.cma" +archive(native) = "markup.cmxa" + diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile b/helm/DEVEL/pxp/pxp/compatibility/Makefile new file mode 100644 index 000000000..187116ccb --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/Makefile @@ -0,0 +1,40 @@ +# make all: make bytecode archive +# make opt: make native archive +# make install: install bytecode archive, and if present, native archive +# make uninstall: uninstall package +# make clean: remove intermediate files (in this directory) +# make CLEAN: remove intermediate files (recursively) +# make distclean: remove any superflous files (recursively) + +#---------------------------------------------------------------------- + +include Makefile.conf + +.PHONY: all +all: + $(MAKE) -f Makefile.code all + +.PHONY: opt +opt: + $(MAKE) -f Makefile.code opt + +.PHONY: install +install: all + files=`../tools/collect_files *.cmi *.cma *.cmxa *.a` && \ + ocamlfind install $(NAME) $(MLI) $$files META + +.PHONY: uninstall +uninstall: + ocamlfind remove $(NAME) + +.PHONY: clean +clean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa *.new *.old + +.PHONY: CLEAN +CLEAN: clean + +.PHONY: distclean +distclean: clean + rm -f *~ depend depend.pkg + diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile.code b/helm/DEVEL/pxp/pxp/compatibility/Makefile.code new file mode 100644 index 000000000..2733faa09 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/Makefile.code @@ -0,0 +1,50 @@ +# make all: make bytecode archives +# make opt: make native archives +#---------------------------------------------------------------------- + +include Makefile.conf + +.PHONY: all +all: markup.cma + +.PHONY: opt +opt: markup.cmxa + +#---------------------------------------------------------------------- + +markup.cma: $(OBJECTS) + $(OCAMLC) -a -o markup.cma $(OBJECTS) + +markup.cmxa: $(XOBJECTS) + $(OCAMLOPT) -a -o markup.cmxa $(XOBJECTS) + +#---------------------------------------------------------------------- +# general rules: + +OPTIONS = +OCAMLC = ocamlfind ocamlc -g -I .. -package netstring $(OPTIONS) $(ROPTIONS) +OCAMLOPT = ocamlfind ocamlopt -p -I .. -package netstring $(OPTIONS) $(ROPTIONS) +OCAMLDEP = ocamldep $(OPTIONS) +OCAMLFIND = ocamlfind + +depend: *.ml *.mli + $(OCAMLDEP) *.ml *.mli >depend + +.SUFFIXES: .cmo .cmi .cmx .ml .mli + +.ml.cmx: + $(OCAMLOPT) -c $< + +.ml.cmo: + $(OCAMLC) -c $< + +.mli.cmi: + $(OCAMLC) -c $< + +*.mli: + + +# Generated dependencies: + +include depend + diff --git a/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf b/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf new file mode 100644 index 000000000..061d0cae1 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/Makefile.conf @@ -0,0 +1,9 @@ +NAME = markup + +OBJECTS = markup_types.cmo markup_dtd.cmo markup_reader.cmo \ + markup_document.cmo markup_yacc.cmo +XOBJECTS = $(OBJECTS:.cmo=.cmx) + +MLI = markup_document.mli markup_dtd.mli \ + markup_types.mli markup_yacc.mli markup_reader.mli + diff --git a/helm/DEVEL/pxp/pxp/compatibility/README b/helm/DEVEL/pxp/pxp/compatibility/README new file mode 100644 index 000000000..50086732a --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/README @@ -0,0 +1,21 @@ +This directory contains the modules for Markup-0.2.10 +compatibility. The modules consist mainly of wrapper classes for the +new PXP classes, and translate the old methods to the new ones. + +Please note that the compatibility is not perfect. Sometimes there are +new methods which do not exist in Markup-0.2.10, and sometimes even +existing methods changed their signature. I have tried to avoid that, +but there are some ugly cases which are hard to solve without such +modifications. + +Translating old methods into new methods costs time and +memory. Because of this, it is best to consider the compatibility +modules as migration path to PXP: You can test whether PXP parses your +input files, and you can compare the old API with the new API +directly. (However, it is hard to test new features of PXP with the +compatibility modules; the old API does not reflect the new features.) + +The compatibility modules are currently maintained, but that will stop +once PXP has been established. + +(Gerd) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml new file mode 100644 index 000000000..bbc497953 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_document.ml @@ -0,0 +1,374 @@ +(* $Id$ + * ---------------------------------------------------------------------- + *) + +type node_type = + T_element of string + | T_data + +class type [ 'node ] extension = [ 'node ] Pxp_document.extension + +class type [ 'ext, 'node ] pxp_extension_type = +object ('self) + method clone : 'self + method node : 'self Pxp_document.node + method set_node : 'self Pxp_document.node -> unit + + method markup_node : 'node + method set_markup_node : 'node -> unit + + method set_index : 'self Pxp_yacc.index -> unit + method index : 'self Pxp_yacc.index + end +;; + + +class type [ 'ext ] node = + object ('self) + constraint 'ext = 'ext node #extension + method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node + + method extension : 'ext + method delete : unit + method parent : 'ext node + method root : 'ext node + method orphaned_clone : 'ext node + method orphaned_flat_clone : 'ext node + method add_node : 'ext node -> unit + method add_pinstr : Markup_dtd.proc_instruction -> unit + method pinstr : string -> Markup_dtd.proc_instruction list + method pinstr_names : string list + method sub_nodes : 'ext node list + method iter_nodes : ('ext node -> unit) -> unit + method iter_nodes_sibl : + ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit + method set_nodes : 'ext node list -> unit + method data : string + method node_type : node_type + method attribute : string -> Markup_types.att_value + method attribute_names : string list + method attribute_type : string -> Markup_types.att_type + method attributes : (string * Markup_types.att_value) list + method required_string_attribute : string -> string + method required_list_attribute : string -> string list + method optional_string_attribute : string -> string option + method optional_list_attribute : string -> string list + method quick_set_attributes : (string * Markup_types.att_value) list -> unit + method find : string -> 'ext node + method reset_finder : unit + method dtd : Markup_dtd.dtd + method create_element : + Markup_dtd.dtd -> node_type -> (string * string) list -> 'ext node + method create_data : Markup_dtd.dtd -> string -> 'ext node + method local_validate : unit + method keep_always_whitespace_mode : unit + method write_compact_as_latin1 : Markup_types.output_stream -> unit + method internal_adopt : 'ext node option -> unit + method internal_delete : 'ext node -> unit + method internal_init : Markup_dtd.dtd -> string -> (string * string) list -> unit + end +;; + + +class [ 'ext ] pxp_extension init_markup_node = + (object (self : 'self) + (* constraint 'ext = 'ext node #extension *) + val mutable pxp_node = (None : + 'self Pxp_document.node option) + (* 'ext pxp_extension Pxp_document.node option *) + val mutable markup_node = (init_markup_node : 'ext node) + + val mutable index = (None : 'self Pxp_yacc.index option) + + method clone = + {< >} + + method node = + match pxp_node with + None -> + assert false + | Some n -> n + + method set_node n = + pxp_node <- Some n + + method markup_node = markup_node + + method set_markup_node n = markup_node <- n + + method set_index ix = + index <- Some ix + + method index = + match index with + None -> assert false + | Some x -> x + + end + : ['ext, 'ext node] pxp_extension_type ) +;; + + +class [ 'ext ] emulate_markup_node init_ext init_pxp_node = + object (self) + constraint 'ext = 'ext node #extension + val mutable pxp_node = (init_pxp_node : + ('ext, 'ext #node) + pxp_extension_type Pxp_document.node option) + val mutable extension = (init_ext : 'ext) + + method pxp_node = + match pxp_node with + None -> assert false + | Some n -> n + + method extension = extension + method delete = self # pxp_node # delete + method parent = self # pxp_node # parent # extension # markup_node + method root = self # pxp_node # root # extension # markup_node + + method orphaned_clone = + let ext' = extension # clone in + let pxp' = self # pxp_node # orphaned_clone in + let n = new emulate_markup_node ext' (Some pxp') in + ext' # set_node (n : 'ext #node :> 'ext node); + pxp' # extension # set_markup_node n; + n + + method orphaned_flat_clone = + let ext' = extension # clone in + let pxp' = self # pxp_node # orphaned_flat_clone in + let n = new emulate_markup_node ext' (Some pxp') in + ext' # set_node (n : 'ext #node :> 'ext node); + pxp' # extension # set_markup_node n; + n + + method dtd = self # pxp_node # dtd + + method add_node (n : 'ext node) = + let n_pxp = n # pxp_node in + self # pxp_node # add_node n_pxp + + method add_pinstr pi = + self # pxp_node # add_pinstr pi + + method sub_nodes = + let l = self # pxp_node # sub_nodes in + List.map (fun n_pxp -> n_pxp # extension # markup_node) l + + method pinstr name = + self # pxp_node # pinstr name + + method pinstr_names = + self # pxp_node # pinstr_names + + method iter_nodes f = + self # pxp_node # iter_nodes + (fun n_pxp -> f (n_pxp # extension # markup_node)) + + method iter_nodes_sibl f = + self # pxp_node # iter_nodes_sibl + (fun left_pxp node_pxp right_pxp -> + let left = + match left_pxp with + None -> None + | Some n_pxp -> Some (n_pxp # extension # markup_node) in + let right = + match right_pxp with + None -> None + | Some n_pxp -> Some (n_pxp # extension # markup_node) in + let node = + node_pxp # extension # markup_node in + f left node right + ) + + method set_nodes (l : 'ext node list) = + let l_pxp = List.map (fun n -> n # pxp_node) l in + self # pxp_node # set_nodes l_pxp + + method data = self # pxp_node # data + + method node_type = + match self # pxp_node # node_type with + Pxp_document.T_data -> T_data + | Pxp_document.T_element name -> T_element name + | Pxp_document.T_super_root -> T_element "-vr" + | Pxp_document.T_pinstr _ -> T_element "-pi" + | _ -> assert false + + method attribute name = + self # pxp_node # attribute name + + method attribute_names = + self # pxp_node # attribute_names + + method attribute_type name = + self # pxp_node # attribute_type name + + method attributes = + self # pxp_node # attributes + + method required_string_attribute name = + self # pxp_node # required_string_attribute name + + method required_list_attribute name = + self # pxp_node # required_list_attribute name + + method optional_string_attribute name = + self # pxp_node # optional_string_attribute name + + method optional_list_attribute name = + self # pxp_node # optional_list_attribute name + + method quick_set_attributes l = + self # pxp_node # quick_set_attributes l + + method find (name : string) = + let index = self # root # pxp_node # extension # index in + let n = index # find name in (* may raise Not_found *) + n # extension # markup_node + + method reset_finder = () + + method create_element dtd nt atts = + let nt_pxp = + match nt with + T_data -> Pxp_document.T_data + | T_element name -> Pxp_document.T_element name in + let node_pxp = + self # pxp_node # create_element dtd nt_pxp atts in + let ext' = extension # clone in + let n = new emulate_markup_node ext' (Some node_pxp) in + ext' # set_node (n : 'ext #node :> 'ext node); + node_pxp # extension # set_markup_node n; + n + + method create_data dtd s = + let node_pxp = + self # pxp_node # create_data dtd s in + let ext' = extension # clone in + let n = new emulate_markup_node ext' (Some node_pxp) in + ext' # set_node (n : 'ext #node :> 'ext node); + node_pxp # extension # set_markup_node n; + n + + method keep_always_whitespace_mode = + self # pxp_node # keep_always_whitespace_mode + + method write_compact_as_latin1 out = + self # pxp_node # write_compact_as_latin1 out + + method local_validate = + self # pxp_node # local_validate() + + method internal_adopt (p:'ext node option) = + assert false; + () + + method internal_delete (n:'ext node) = + assert false; + () + + method internal_init (d:Markup_dtd.dtd) (s:string) (atts:(string*string)list) = + assert false; + () + end +;; + +class [ 'ext ] data_impl ext data = + object (self) + inherit [ 'ext ] emulate_markup_node ext None + constraint 'ext = 'ext node #extension + initializer + if data <> "" then + failwith "Emulation of Markup_document: Cannot instantiate data node with non-empty string"; + let self' = (self : 'ext #node :> 'ext node ) in + pxp_node <- Some (new Pxp_document.data_impl (new pxp_extension self')) + + end +;; + +class [ 'ext ] element_impl ext = + object (self) + inherit [ 'ext ] emulate_markup_node ext None + initializer + let self' = (self : 'ext #node :> 'ext node ) in + pxp_node <- Some (new Pxp_document.element_impl (new pxp_extension self')) + end +;; + + +class [ 'ext ] document w = + object (self) + val pxp_doc = new Pxp_document.document + (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings) + + val mutable standalone_flag = false + + method init_xml_version v = + pxp_doc # init_xml_version v + + method xml_version = + pxp_doc # xml_version + + method init_xml_standalone b = + standalone_flag <- b + + method xml_standalone = standalone_flag + + method init_root (r : 'ext node) = + pxp_doc # init_root (r # pxp_node); + self # dtd # set_standalone_declaration standalone_flag + (* questionable *) + + method root = + let pxp_root = pxp_doc # root in + pxp_root # extension # markup_node + + method dtd = + pxp_doc # dtd + + method add_pinstr pi = + pxp_doc # add_pinstr pi + + method pinstr name = + pxp_doc # pinstr name + + method pinstr_names = + pxp_doc # pinstr_names + + method write_compact_as_latin1 out = + pxp_doc # write_compact_as_latin1 out + + end +;; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.6 2000/08/18 20:19:00 gerd + * Changed the emulation: there are now wrapper objects for nodes. + * This was necessary because node_type changed in PXP such that it became + * incompatible with Markup's node_type. + * + * Revision 1.5 2000/07/14 21:35:35 gerd + * Updated because of the simplification of Pxp_types.collect_warnings. + * + * Revision 1.4 2000/07/08 17:40:50 gerd + * Updated the simulation. + * + * Revision 1.3 2000/06/14 22:19:27 gerd + * Update because of additional 'encoding' methods. + * + * Revision 1.2 2000/05/30 00:08:40 gerd + * Bugfix. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + *) + diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli new file mode 100644 index 000000000..2e37f0f22 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_document.mli @@ -0,0 +1,420 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * Markup! The validating XML parser for Objective Caml. + * Copyright 1999 by Gerd Stolpmann. See LICENSE for details. + * + * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_document.mli. + * It corresponds to revision 1.13 of markup_document.mli. + *) + +(**********************************************************************) +(* *) +(* Markup_document: *) +(* Object model of the document/element instances *) +(* *) +(**********************************************************************) + + +(* ====================================================================== + * OVERVIEW + * + * class type node ............. The common class type of the nodes of + * the element tree. Nodes are either + * elements (inner nodes) or data nodes + * (leaves) + * class type extension ........ The minimal properties of the so-called + * extensions of the nodes: Nodes can be + * customized by applying a class parameter + * that adds methods/values to nodes. + * class data_impl : node ...... Implements data nodes. + * class element_impl : node ... Implements element nodes + * class document .............. A document is an element with some additional + * properties + * + * ====================================================================== + * + * THE STRUCTURE OF NODE TREES: + * + * Every node except the root node has a parent node. The parent node is + * always an element, because data nodes never contain other nodes. + * In the other direction, element nodes may have children; both elements + * and data nodes are possible as children. + * Every node knows its parent (if any) and all its children (if any); + * the linkage is maintained in both directions. A node without a parent + * is called a root. + * It is not possible that a node is the child of two nodes (two different nodes + * or a multiple child of the same node). + * You can break the connection between a node and its parent; the method + * "delete" performs this operations and deletes the node from the parent's + * list of children. The node is now a root, for itself and for all + * subordinate nodes. In this context, the node is also called an orphan, + * because it has lost its parent (this is a bit misleading because the + * parent is not always the creator of a node). + * In order to simplify complex operations, you can also set the list of + * children of an element. Nodes that have been children before are unchanged; + * new nodes are added (and the linkage is set up), nodes no more occurring + * in the list are handled if they have been deleted. + * If you try to add a node that is not a root (either by an "add" or by a + * "set" operation) the operation fails. + * + * CREATION OF NODES + * + * The class interface supports creation of nodes by cloning a so-called + * exemplar. The idea is that it is sometimes useful to implement different + * element types by different classes, and to implement this by looking up + * exemplars. + * Imagine you have three element types A, B, and C, and three classes + * a, b, and c implementing the node interface (for example, by providing + * different extensions, see below). The XML parser can be configured to + * have a lookup table + * { A --> a0, B --> b0, C --> c0 } + * where a0, b0, c0 are exemplars of the classes a, b, and c, i.e. empty + * objects belonging to these classes. If the parser finds an instance of + * A, it looks up the exemplar a0 of A and clones it (actually, the method + * "create_element" performs this for elements, and "create_data" for data + * nodes). Clones belong to the same class as the original nodes, so the + * instances of the elements have the same classes as the configured + * exemplars. + * Note: This technique assumes that the interface of all exemplars is the + * same! + * + * THE EXTENSION + * + * The class type node and all its implementations have a class parameter + * 'ext which must at least fulfil the properties of the class type "extension". + * The idea is that you can add properties, for example: + * + * class my_extension = + * object + * (* minimal properties required by class type "extension": *) + * method clone = ... + * method node = ... + * method set_node n = ... + * (* here my own methods: *) + * method do_this_and_that ... + * end + * + * class my_element_impl = [ my_extension ] element_impl + * class my_data_impl = [ my_extension ] data_impl + * + * The whole XML parser is parameterized with 'ext, so your extension is + * visible everywhere (this is the reason why extensibility is solved by + * parametric polymorphism and not by inclusive polymorphism (subtyping)). + * + * + * SOME COMPLICATED TYPE EXPRESSIONS + * + * Sometimes the following type expressions turn out to be necessary: + * + * 'a node extension as 'a + * This is the type of an extension that belongs to a node that + * has an extension that is the same as we started with. + * + * 'a extension node as 'a + * This is the type of a node that has an extension that belongs to a + * node of the type we started with. + * + * + * DOCUMENTS + * ... + * + * ====================================================================== + * + * SIMPLE USAGE: ... + *) + + +open Markup_dtd + + +type node_type = + T_element of string + | T_data + + + +class type [ 'node ] extension = + object ('self) + method clone : 'self + (* "clone" should return an exact deep copy of the object. *) + method node : 'node + (* "node" returns the corresponding node of this extension. This method + * intended to return exactly what previously has been set by "set_node". + *) + method set_node : 'node -> unit + (* "set_node" is invoked once the extension is associated to a new + * node object. + *) + end +;; + +class type [ 'ext, 'node ] pxp_extension_type = +object ('self) + method clone : 'self + method node : 'self Pxp_document.node + method set_node : 'self Pxp_document.node -> unit + + method markup_node : 'node + method set_markup_node : 'node -> unit + + method set_index : 'self Pxp_yacc.index -> unit + method index : 'self Pxp_yacc.index + end +;; + +class type [ 'ext ] node = + object ('self) + constraint 'ext = 'ext node #extension + method pxp_node : (('ext, 'ext node) pxp_extension_type) Pxp_document.node + + method extension : 'ext + (* Return the extension of this node: *) + + method delete : unit + (* Delete this node from the parent's list of sub nodes. This node gets + * orphaned. + * 'delete' does nothing if this node does not have a parent. + *) + + method parent : 'ext node + (* Get the parent, or raise Not_found if this node is an orphan. *) + + method root : 'ext node + (* Get the direct or indirect parent that does not have a parent itself, + * i.e. the root of the tree. + *) + + method orphaned_clone : 'ext node + (* return an exact clone of this element and all sub nodes (deep copy) + * except string values which are shared by this node and the clone. + * The other exception is that the clone has no parent (i.e. it is now + * a root). + *) + + method orphaned_flat_clone : 'ext node + (* return a clone of this element where all subnodes are omitted. + * The type of the node, and the attributes are the same as in the + * original node. + * The clone has no parent. + *) + + method add_node : 'ext node -> unit + (* Append new sub nodes -- mainly used by the parser itself, but + * of course open for everybody. If an element is added, it must be + * an orphan (i.e. does not have a parent node); and after addition + * *this* node is the new parent. + *) + + method add_pinstr : proc_instruction -> unit + (* Add a processing instruction to the set of processing instructions of + * this node. Usually only elements contain processing instructions. + *) + + method pinstr : string -> proc_instruction list + (* Get all processing instructions with the passed name *) + + method pinstr_names : string list + (* Get a list of all names of processing instructions *) + + method sub_nodes : 'ext node list + (* Get the list of sub nodes *) + + method iter_nodes : ('ext node -> unit) -> unit + (* iterate over the sub nodes *) + + method iter_nodes_sibl : + ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit + (* Here every iteration step can also access to the previous and to the + * following node if present: + *) + + method find : string -> 'ext node + (* Get the node that has an ID attribute with this value, or raise + * Not_found. + * "find" may also cause a Validation_error if something is wrong + * with the IDs. + *) + + method reset_finder : unit + (* makes that newly added nodes will also be found *) + + method set_nodes : 'ext node list -> unit + (* Set the list of sub nodes. Elements that are no longer sub nodes gets + * orphaned, and all new elements that previously were not sub nodes + * must have been orphaned. + *) + + method data : string + (* Get the data string of this node. For data nodes, this string is just + * the content. For elements, this string is the concatenation of all + * subordinate data nodes. + *) + + method node_type : node_type + (* Get the name of the element type. *) + + method attribute : string -> Markup_types.att_value + method attribute_names : string list + method attribute_type : string -> Markup_types.att_type + method attributes : (string * Markup_types.att_value) list + (* Get a specific attribute; get the names of all attributes; get the + * type of a specific attribute; get names and values of all attributes. + * Only elements have attributes. + * Note: If the DTD allows arbitrary for this element, "attribute_type" + * raises Undeclared. + *) + + method required_string_attribute : string -> string + method required_list_attribute : string -> string list + (* Return the attribute or fail if the attribute is not present: + * The first version passes the value always as string back; + * the second version always as list. + *) + + method optional_string_attribute : string -> string option + method optional_list_attribute : string -> string list + (* Return some attribute value or return None if the attribute is not + * present: + * The first version passes the value always as string back; + * the second version always as list. + *) + + method quick_set_attributes : (string * Markup_types.att_value) list -> unit + (* Sets the attributes but does not check whether they match the DTD. + *) + + method dtd : dtd + (* Get the DTD *) + + method create_element : dtd -> node_type -> (string * string) list -> 'ext node + (* create an "empty copy" of this element: + * - new DTD + * - new node type + * - new attribute list + * - empty list of nodes + *) + + method create_data : dtd -> string -> 'ext node + (* create an "empty copy" of this data node: *) + + method local_validate : unit + (* Check that this element conforms to the DTD: *) + + method keep_always_whitespace_mode : unit + (* Normally, add_node does not accept data nodes when the DTD does not + * allow data nodes or only whitespace ("ignorable whitespace"). + * Once you have invoked this method, ignorable whitespace is forced + * to be included into the document. + *) + + method write_compact_as_latin1 : Markup_types.output_stream -> unit + (* Write the contents of this node and the subtrees to the passed + * output stream; the character set ISO-8859-1 is used. The format + * is compact (the opposite of "pretty printing"). + *) + + (* ---------------------------------------- *) + (* internal methods: *) + method internal_adopt : 'ext node option -> unit + method internal_delete : 'ext node -> unit + method internal_init : dtd -> string -> (string * string) list -> unit + end +;; + +class [ 'ext ] data_impl : 'ext -> string -> [ 'ext ] node + +class [ 'ext ] element_impl : 'ext -> [ 'ext ] node + +class [ 'ext ] document : + Markup_types.collect_warnings -> + object + method init_xml_version : string -> unit + method init_xml_standalone : bool -> unit + method init_root : 'ext node -> unit + + method xml_version : string + method xml_standalone : bool + method dtd : dtd + method root : 'ext node + + method add_pinstr : proc_instruction -> unit + method pinstr : string -> proc_instruction list + method pinstr_names : string list + + method write_compact_as_latin1 : Markup_types.output_stream -> unit + (* Write the document to the passed + * output stream; the character set ISO-8859-1 is used. The format + * is compact (the opposite of "pretty printing"). + * If a DTD is present, the DTD is included into the internal subset. + *) + + end +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.4 2000/08/18 20:19:16 gerd + * Updates in the emulation because of PXP changes. + * + * Revision 1.3 2000/07/16 16:35:06 gerd + * Update because PXP interface contains now the method 'write'. + * + * Revision 1.2 2000/06/14 22:19:27 gerd + * Update because of additional 'encoding' methods. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + * ====================================================================== + * OLD LOGS: + * + * Revision 1.13 2000/05/27 19:15:08 gerd + * Removed the method init_xml_standalone. + * + * Revision 1.12 2000/05/01 20:42:34 gerd + * New method write_compact_as_latin1. + * + * Revision 1.11 2000/04/30 18:15:57 gerd + * Beautifications. + * New method keep_always_whitespace_mode. + * + * Revision 1.10 2000/03/11 22:58:15 gerd + * Updated to support Markup_codewriter. + * + * Revision 1.9 2000/01/27 21:51:56 gerd + * Added method 'attributes'. + * + * Revision 1.8 2000/01/27 21:19:07 gerd + * Added further methods. + * + * Revision 1.7 1999/11/09 22:20:14 gerd + * Removed method init_dtd from class "document". The DTD is + * implicitly passed to the document by the root element. + * + * Revision 1.6 1999/09/01 22:51:40 gerd + * Added methods to store processing instructions. + * + * Revision 1.5 1999/09/01 16:19:57 gerd + * The "document" class has now a "warner" as class argument. + * + * Revision 1.4 1999/08/19 21:59:13 gerd + * Added method "reset_finder". + * + * Revision 1.3 1999/08/19 01:08:29 gerd + * Added method "find". + * + * Revision 1.2 1999/08/15 02:19:41 gerd + * Some new explanations: That unknown elements are not rejected + * if the DTD allows them. + * + * Revision 1.1 1999/08/10 00:35:51 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml new file mode 100644 index 000000000..7df5e29c6 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.ml @@ -0,0 +1,36 @@ +(* $Id$ + * ---------------------------------------------------------------------- + *) + +class dtd w = + Pxp_dtd.dtd + (w : Markup_types.collect_warnings :> Pxp_types.collect_warnings) + `Enc_iso88591;; + +class dtd_element dtd name = + Pxp_dtd.dtd_element dtd name;; + +class dtd_notation name id = + Pxp_dtd.dtd_notation name id `Enc_iso88591;; + +class proc_instruction target value = + Pxp_dtd.proc_instruction target value `Enc_iso88591;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.3 2000/07/14 21:35:35 gerd + * Updated because of the simplification of Pxp_types.collect_warnings. + * + * Revision 1.2 2000/06/14 22:19:27 gerd + * Update because of additional 'encoding' methods. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli new file mode 100644 index 000000000..660b35ae8 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_dtd.mli @@ -0,0 +1,108 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * Markup! The validating XML parser for Objective Caml. + * Copyright 1999 by Gerd Stolpmann. See LICENSE for details. + * + * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_dtd.mli. + * It corresponds to revision 1.11 of markup_dtd.mli. + *) + +(**********************************************************************) +(* *) +(* Markup_dtd: *) +(* Object model of document type declarations *) +(* *) +(**********************************************************************) + +(* ====================================================================== + * OVERVIEW + * + * class dtd ............... represents the whole DTD, including element + * declarations, entity declarations, notation + * declarations, and processing instructions + * class dtd_element ....... represents an element declaration consisting + * of a content model and an attribute list + * declaration + * class dtd_notation ...... represents a notation declaration + * class proc_instruction .. represents a processing instruction + * ====================================================================== + * + *) + + +class dtd : + Markup_types.collect_warnings -> + Pxp_dtd.dtd + (* Incompatibilities: + * add_gen_entity, gen_entity + *) + +class dtd_element : dtd -> string -> Pxp_dtd.dtd_element + (* Incompatibilities: + * set_content_model, add_attribute + *) + +class dtd_notation : string -> Markup_types.ext_id -> Pxp_dtd.dtd_notation + +class proc_instruction : string -> string -> Pxp_dtd.proc_instruction + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + * ====================================================================== + * OLD LOGS: + * + * Revision 1.11 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.10 2000/05/27 19:20:38 gerd + * Changed the interfaces for the standalone check: New + * methods: standalone_declaration, set_standalone_declaration, + * externally_declared, attribute_violates_standalone_declaration. + * The method set_content_model has been renamed to + * set_cm_and_extdecl; it now initializes also whether the element + * has been declared in an external entity. + * Methods add_gen_entity and gen_entity pass an additional + * boolean argument containing whether the declaration of the + * general entity happened in an external entity. + * Method add_attribute expects this argument, too, which + * states whether the declaration of the attribute happened in an + * external entity. + * + * Revision 1.9 2000/05/20 20:31:40 gerd + * Big change: Added support for various encodings of the + * internal representation. + * + * Revision 1.8 2000/05/06 23:10:26 gerd + * allow_arbitrary for elements, too. + * + * Revision 1.7 2000/05/01 20:42:52 gerd + * New method write_compact_as_latin1. + * + * Revision 1.6 2000/03/11 22:58:15 gerd + * Updated to support Markup_codewriter. + * + * Revision 1.5 2000/02/22 02:32:02 gerd + * Updated. + * + * Revision 1.4 1999/11/09 22:15:41 gerd + * Added method "arbitrary_allowed". + * + * Revision 1.3 1999/09/01 16:21:56 gerd + * "dtd" classes have now an argument that passes a "warner". + * + * Revision 1.2 1999/08/15 02:20:23 gerd + * New feature: a DTD can allow arbitrary elements. + * + * Revision 1.1 1999/08/10 00:35:51 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml new file mode 100644 index 000000000..a196c2219 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.ml @@ -0,0 +1,119 @@ +(* $Id$ + * ---------------------------------------------------------------------- + *) + +open Markup_types;; + +class type resolver = + object + method open_in : ext_id -> Lexing.lexbuf + method close_in : unit + method change_encoding : string -> unit + method clone : resolver + end +;; + +(* General note: close_in is simulated by close_all. Of course, this is + * wrong, but it should not matter + *) + + +class resolve_read_channel ch the_warner = + object (self) + val pxp_resolver = + new Pxp_reader.resolve_read_this_channel + ~auto_close:false + ch + val warner = the_warner + + initializer + pxp_resolver # init_warner + (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings); + pxp_resolver # init_rep_encoding `Enc_iso88591; + + method open_in xid = + pxp_resolver # open_in xid + + method close_in = + pxp_resolver # close_all (* sic! *) + + method change_encoding enc = + pxp_resolver # change_encoding enc + + method clone = + ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver ) + + end +;; + + +class resolve_read_string str = + object (self) + val pxp_resolver = + new Pxp_reader.resolve_read_this_string str + val warner = new Pxp_types.drop_warnings + + initializer + pxp_resolver # init_warner warner; + pxp_resolver # init_rep_encoding `Enc_iso88591; + + method open_in xid = + pxp_resolver # open_in xid + + method close_in = + pxp_resolver # close_all (* sic! *) + + method change_encoding enc = + pxp_resolver # change_encoding enc + + method clone = + ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver ) + end +;; + + +class resolve_as_file the_warner = + object (self) + val pxp_resolver = + new Pxp_reader.resolve_as_file + ~system_encoding:`Enc_iso88591 + () + val warner = the_warner + + initializer + pxp_resolver # init_warner + (warner : Markup_types.collect_warnings :> Pxp_types.collect_warnings); + pxp_resolver # init_rep_encoding `Enc_iso88591; + + method open_in xid = + pxp_resolver # open_in xid + + method close_in = + pxp_resolver # close_all (* sic! *) + + method change_encoding enc = + pxp_resolver # change_encoding enc + + method clone = + ( {< pxp_resolver = pxp_resolver # clone >} : #resolver :> resolver ) + end +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.3 2000/07/14 21:35:35 gerd + * Updated because of the simplification of Pxp_types.collect_warnings. + * + * Revision 1.2 2000/07/08 17:40:50 gerd + * Updated the simulation. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli new file mode 100644 index 000000000..8e5e2c8fc --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_reader.mli @@ -0,0 +1,141 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * Markup! The validating XML parser for Objective Caml. + * Copyright by Gerd Stolpmann. See LICENSE for details. + * + * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_reader.mli. + * It corresponds to revision 1.3 of markup_reader.mli. + *) + +open Markup_types;; + + +(* The class type resolver is the official type of all "resolvers". + * Resolvers get file names (or better, external identifiers) and + * return lexbufs, scanning the file for tokens. Resolvers may be + * cloned, and clones can interpret relative file names relative to + * their creator. + *) + +class type resolver = + object + (* A resolver can open a character source, and returns this source as + * Lexing.lexbuf. + * The resolver should recode the source into ISO-8859-1. By default, + * a resolver should assume UTF-8 or UTF-16 encoding. Before + * 'change_encoding' is invoked, the resolver should only return + * lexbufs with one character. After 'change_encoding' has been invoked, + * there is no character limit anymore. + * 'change_encoding' can only be invoked once. This method is usually + * called after the prolog of the entity has been read. + * If this method is not called, it is up to the resolver to find out + * if UTF-8 or UTF-16 is used. It is recommended to invoke this method + * with an empty string to indicate this situation. + *) + method open_in : ext_id -> Lexing.lexbuf + method close_in : unit + method change_encoding : string -> unit + + + (* Every resolver can be cloned. The clone does not inherit the connection + * with the external object, i.e. it is closed. + *) + method clone : resolver + + end +;; + + +(* The following class is the current main implementation of resolvers. + * It fetches strings from an arbitrary source (by calling init_in, and + * then repeatedly next_string), recodes them to ISO-8859-1, and creates + * lexbufs for them. + * It is not complete, as the source is missing. + * + * Note that 'resolve_general' may change in future revisions; it is ugly. + *) + +(* -- This API simulation does not provide 'resolve_general' any longer + +class virtual resolve_general : + collect_warnings -> + object + val mutable encoding : string + val mutable encoding_requested : bool + val warner : collect_warnings + + method clone : resolver + + method private warn : int -> unit + method private autodetect : string -> unit + + method private virtual next_string : string -> int -> int -> int + method private virtual init_in : ext_id -> unit + method virtual close_in : unit + + method open_in : ext_id -> Lexing.lexbuf + + method change_encoding : string -> unit + end +*) + + +(* The next classes are resolvers for concrete input sources. *) + +class resolve_read_channel : + in_channel -> collect_warnings -> resolver;; + + (* Reads from the passed channel (it may be even a pipe). Note that this + * resolver cannot handle file inclusions, as it is pre-bound to a + * specific channel and is not able to interpret file names. + * That means, if there is a entity reference (something like &name; or + * %name;) to parse, and the definition points to another file, the + * resolver will fail. + *) + + +class resolve_read_string : + string -> resolver;; + + (* Reads from the passed string. As 'resolver_read_channel', this + * resolver cannot handle file inclusions. + *) + + +class resolve_as_file : + collect_warnings -> resolver;; + + (* Reads from the local file system. Every file name is interpreted as + * file name of the local file system, and the referred file is read. + * This resolver can handle file inclusions as long as they do not + * exceed the scope of the local file system (i.e. no URLs). + *) + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.2 2000/07/08 17:40:50 gerd + * Updated the simulation. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + * ====================================================================== + * OLD LOGS: + * + * Revision 1.3 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.2 2000/05/20 20:31:40 gerd + * Big change: Added support for various encodings of the + * internal representation. + * + * Revision 1.1 2000/03/13 23:41:54 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml new file mode 100644 index 000000000..a0c0c271b --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_types.ml @@ -0,0 +1,103 @@ +(* $Id$ + * ---------------------------------------------------------------------- + *) + + +type ext_id = Pxp_types.ext_id = + System of string + | Public of (string * string) + | Anonymous +type dtd_id = Pxp_types.dtd_id= + External of ext_id + | Derived of ext_id + | Internal +type content_model_type = Pxp_types.content_model_type = + Unspecified + | Empty + | Any + | Mixed of mixed_spec list + | Regexp of regexp_spec +and mixed_spec = Pxp_types.mixed_spec = + MPCDATA + | MChild of string +and regexp_spec = Pxp_types.regexp_spec = + Optional of regexp_spec + | Repeated of regexp_spec + | Repeated1 of regexp_spec + | Alt of regexp_spec list + | Seq of regexp_spec list + | Child of string +type att_type = Pxp_types.att_type = + A_cdata + | A_id + | A_idref + | A_idrefs + | A_entity + | A_entities + | A_nmtoken + | A_nmtokens + | A_notation of string list + | A_enum of string list +type att_default = Pxp_types.att_default = + D_required + | D_implied + | D_default of string + | D_fixed of string +type att_value = Pxp_types.att_value = + Value of string + | Valuelist of string list + | Implied_value + +class collect_warnings = +object + val mutable w = Buffer.create 100 + method print_warnings = + Buffer.contents w + method reset = + Buffer.clear w + method warn s = + Buffer.add_string w ("WARNING: " ^ s ^ "\n") +end + +exception Illegal_character of int +exception Validation_error = Pxp_types.Validation_error +exception WF_error = Pxp_types.WF_error +exception Character_not_supported = Pxp_types.Character_not_supported +exception Bad_character_stream = Netconversion.Malformed_code +exception At = Pxp_types.At +exception Undeclared = Pxp_types.Undeclared + +let string_of_exn = Pxp_types.string_of_exn + +type output_stream = Pxp_types.output_stream = + Out_buffer of Buffer.t + | Out_channel of out_channel + | Out_function of (string -> int -> int -> unit) + +let write = Pxp_types.write + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.5 2000/08/18 20:19:16 gerd + * Updates in the emulation because of PXP changes. + * + * Revision 1.4 2000/07/16 18:30:15 gerd + * Updated because PXP does no longer have the exception + * Illegal_character. + * + * Revision 1.3 2000/07/14 21:35:35 gerd + * Updated because of the simplification of Pxp_types.collect_warnings. + * + * Revision 1.2 2000/07/08 17:40:50 gerd + * Updated the simulation. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli new file mode 100644 index 000000000..b33bb30b2 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_types.mli @@ -0,0 +1,125 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * Markup! The validating XML parser for Objective Caml. + * Copyright 1999 by Gerd Stolpmann. See LICENSE for details. + * + * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_types.mli. + * It corresponds to revision 1.7 of markup_types.mli. + *) + + +type ext_id = Pxp_types.ext_id = + System of string + | Public of (string * string) + | Anonymous +type dtd_id = Pxp_types.dtd_id = + External of ext_id + | Derived of ext_id + | Internal +type content_model_type = Pxp_types.content_model_type = + Unspecified + | Empty + | Any + | Mixed of mixed_spec list + | Regexp of regexp_spec +and mixed_spec = Pxp_types.mixed_spec = + MPCDATA + | MChild of string +and regexp_spec = Pxp_types.regexp_spec = + Optional of regexp_spec + | Repeated of regexp_spec + | Repeated1 of regexp_spec + | Alt of regexp_spec list + | Seq of regexp_spec list + | Child of string +type att_type = Pxp_types.att_type = + A_cdata + | A_id + | A_idref + | A_idrefs + | A_entity + | A_entities + | A_nmtoken + | A_nmtokens + | A_notation of string list + | A_enum of string list +type att_default = Pxp_types.att_default = + D_required + | D_implied + | D_default of string + | D_fixed of string +type att_value = Pxp_types.att_value = + Value of string + | Valuelist of string list + | Implied_value + +class collect_warnings : + object + method warn : string -> unit + method print_warnings : string + method reset : unit + end +;; + + +exception Illegal_character of int +exception Validation_error of string +exception WF_error of string +exception Character_not_supported +exception Bad_character_stream +exception At of (string * exn) +exception Undeclared + +val string_of_exn : exn -> string + (* Converts a Markup exception into a readable string *) + + +type output_stream = Pxp_types.output_stream = + Out_buffer of Buffer.t + | Out_channel of out_channel + | Out_function of (string -> int -> int -> unit) + +val write : output_stream -> string -> int -> int -> unit + (* write os s pos len: Writes the string to the buffer/channel/stream *) + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.2 2000/07/08 17:40:50 gerd + * Updated the simulation. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + * ====================================================================== + * OLD LOGS: + * + * Revision 1.7 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.6 2000/05/20 20:31:40 gerd + * Big change: Added support for various encodings of the + * internal representation. + * + * Revision 1.5 2000/05/01 20:43:25 gerd + * New type output_stream; new function 'write'. + * + * Revision 1.4 1999/09/01 16:25:35 gerd + * Dropped Illegal_token and Content_not_allowed_here. WF_error can + * be used instead. + * + * Revision 1.3 1999/08/15 02:22:40 gerd + * Added exception Undeclared. + * + * Revision 1.2 1999/08/14 22:15:17 gerd + * New class "collect_warnings". + * + * Revision 1.1 1999/08/10 00:35:52 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml new file mode 100644 index 000000000..26c40de18 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.ml @@ -0,0 +1,245 @@ +(* $Id$ + * ---------------------------------------------------------------------- + *) + +open Markup_types +open Markup_dtd +open Markup_document + +type config = + { warner : collect_warnings; + errors_with_line_numbers : bool; + processing_instructions_inline : bool; + virtual_root : bool; + debugging_mode : bool; + } + + +type source = + Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver) + | Channel of in_channel + | File of string + | Latin1 of string + | ExtID of (ext_id * Markup_reader.resolver) + +type 'ext domspec = + { map : (node_type, 'ext node) Hashtbl.t; + default_element : 'ext node; + } + + +class default_ext = + object(self) + val mutable node = (None : ('a extension node as 'a) option) + method clone = {< >} + method node = + match node with + None -> + assert false + | Some n -> n + method set_node n = + node <- Some n + end +;; + + +let default_extension = new default_ext;; + +let default_config = + { warner = new collect_warnings; + errors_with_line_numbers = true; + processing_instructions_inline = false; + virtual_root = false; + debugging_mode = false; + } + + +let default_dom = + let d = Hashtbl.create 2 in + Hashtbl.add d T_data (new data_impl default_extension ""); + { map = d; + default_element = new element_impl default_extension + } +;; + + +let pxp_config cfg = + { Pxp_yacc.default_config with + Pxp_yacc.warner = (cfg.warner :> Pxp_types.collect_warnings); + Pxp_yacc.errors_with_line_numbers = cfg.errors_with_line_numbers; + Pxp_yacc.enable_pinstr_nodes = cfg.processing_instructions_inline; + Pxp_yacc.enable_super_root_node = cfg.virtual_root; + Pxp_yacc.encoding = `Enc_iso88591; + Pxp_yacc.recognize_standalone_declaration = false; + Pxp_yacc.debugging_mode = cfg.debugging_mode; + } +;; + + +class pxp_resolver r = + object (self) + val markup_resolver = r + + method init_rep_encoding enc = + assert (enc = `Enc_iso88591 ) + + method init_warner w = + () + + method rep_encoding = `Enc_iso88591 + + method open_in xid = + markup_resolver # open_in xid + + method close_in = + markup_resolver # close_in + + method close_all = + markup_resolver # close_in + + method change_encoding enc = + markup_resolver # change_encoding enc + + method clone = + ( {< markup_resolver = markup_resolver # clone >} + : #Pxp_reader.resolver :> Pxp_reader.resolver ) + end +;; + + +let pxp_source src = + match src with + Entity (mkent, res) -> Pxp_yacc.Entity(mkent, new pxp_resolver res) + | ExtID (id, res) -> Pxp_yacc.ExtID(id, new pxp_resolver res) + | Channel ch -> Pxp_yacc.from_channel + ~system_encoding:`Enc_iso88591 ch + | File f -> Pxp_yacc.from_file + ~system_encoding:`Enc_iso88591 f + | Latin1 s -> Pxp_yacc.from_string ~fixenc:`Enc_iso88591 s +;; + + +let pxp_dom dom = + let dex = + try Hashtbl.find dom.map T_data + with Not_found -> assert false + in + let eex = dom.default_element in + let m = Hashtbl.create 100 in + Hashtbl.iter + (fun nt ex -> + match nt with + T_element name when name <> "-vr" && name <> "-pi" -> + let pxp_ex = ex # pxp_node in + Hashtbl.add m name pxp_ex + | _ -> () + ) + dom.map; + let srex = + try + Some ((Hashtbl.find dom.map (T_element "-vr")) # pxp_node) + with + Not_found -> None + in + let piex = + try + Some ((Hashtbl.find dom.map (T_element "-pi")) # pxp_node) + with + Not_found -> None + in + Pxp_document.make_spec_from_mapping + ?super_root_exemplar:srex + ?default_pinstr_exemplar:piex + ~data_exemplar:(dex # pxp_node) + ~default_element_exemplar:(eex # pxp_node) + ~element_mapping:m + () +;; + + +let markup_document w index doc = + let mdoc = new document w in + mdoc # init_xml_version (doc # xml_version); + mdoc # init_xml_standalone (doc # xml_standalone); + let r = doc # root # extension in + r # set_index index; + mdoc # init_root (r # markup_node); + List.iter + (fun piname -> + let l = doc # pinstr piname in + List.iter + (fun pi -> mdoc # add_pinstr pi) + l) + (doc # pinstr_names); + mdoc +;; + + + +let parse_dtd_entity cfg src = + Pxp_yacc.parse_dtd_entity + (pxp_config cfg) + (pxp_source src) +;; + + +let parse_document_entity cfg src dom = + let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in + markup_document + cfg.warner + index + (Pxp_yacc.parse_document_entity + ~id_index:index + (pxp_config cfg) + (pxp_source src) + (pxp_dom dom)) +;; + + +let parse_content_entity cfg src dtd dom = + let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in + let n = + (Pxp_yacc.parse_content_entity + ~id_index:index + (pxp_config cfg) + (pxp_source src) + dtd + (pxp_dom dom)) # extension in + n # set_index index; + n # markup_node +;; + + +let parse_wf_entity cfg src dom = + let index = (new Pxp_yacc.hash_index :> 'ext Pxp_yacc.index) in + (* Restriction: index is not filled! *) + markup_document + cfg.warner + index + (Pxp_yacc.parse_wfdocument_entity + (pxp_config cfg) + (pxp_source src) + (pxp_dom dom)) +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.4 2000/08/18 20:19:16 gerd + * Updates in the emulation because of PXP changes. + * + * Revision 1.3 2000/07/14 21:35:35 gerd + * Updated because of the simplification of Pxp_types.collect_warnings. + * + * Revision 1.2 2000/07/08 17:40:50 gerd + * Updated the simulation. + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + *) diff --git a/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli new file mode 100644 index 000000000..daccad4c7 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/compatibility/markup_yacc.mli @@ -0,0 +1,233 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * Markup! The validating XML parser for Objective Caml. + * Copyright 1999 by Gerd Stolpmann. See LICENSE for details. + * + * THIS IS THE markup-0.2.10 COMPATIBLE INTERFACE TO markup_yacc.mli. + * It corresponds to revision 1.4 of markup_yacc.mli. + *) + + +(*$ markup-yacc.mli *) + +open Markup_types +open Markup_dtd +open Markup_document + +type config = + { warner : collect_warnings; + (* An object that collects warnings. *) + + errors_with_line_numbers : bool; + (* Whether error messages contain line numbers or not. The parser + * is 10 to 20 per cent faster if line numbers are turned off; + * you get only character positions in this case. + *) + + processing_instructions_inline : bool; + (* true: turns a special mode for processing instructions on. Normally, + * you cannot determine the exact location of a PI; you only know + * in which element the PI occurs. The "inline" mode makes it possible + * to find the exact location out: Every PI is artificially wrapped + * by a special element with name "-pi". For example, if the XML text + * is , the parser normally produces only an element + * object for "a", and puts the PIs "x" and "y" into it (without + * order). In inline mode, the object "a" will contain two objects + * with name "-pi", and the first object will contain "x", and the + * second "y". + * Notes: + * (1) The name "-pi" is reserved. You cannot use it for your own + * tags because tag names must not begin with '-'. + * (2) You need not to add a declaration for "-pi" to the DTD. These + * elements are handled separately. + * (3) Of course, the "-pi" objects are created from exemplars of + * your DOM map. + *) + + virtual_root : bool; + (* true: the topmost element of the XML tree is not the root element, + * but the so-called virtual root. The root element is a son of the + * virtual root. The virtual root is an ordinary element with name + * "-vr". + * The following behaviour changes, too: + * - PIs occurring outside the root element and outside the DTD are + * added to the virtual root instead of the document object + * - If processing_instructions_inline is also turned on, these PIs + * are added inline to the virtual root + * Notes: + * (1) The name "-vr" is reserved. You cannot use it for your own + * tags because tag names must not begin with '-'. + * (2) You need not to add a declaration for "-vr" to the DTD. These + * elements are handled separately. + * (3) Of course, the "-vr" objects are created from exemplars of + * your DOM map. + *) + + (* The following options are not implemented, or only for internal + * use. + *) + + debugging_mode : bool; + } + + +type source = + Entity of ((dtd -> Pxp_entity.entity) * Markup_reader.resolver) + | Channel of in_channel + | File of string + | Latin1 of string + | ExtID of (ext_id * Markup_reader.resolver) + +(* Note on sources: + * + * The sources do not have all the same capabilities. Here the differences: + * + * - File: A File source reads from a file by name. This has the advantage + * that references to external entites can be resolved. - The problem + * with SYSTEM references is that they usually contain relative file + * names; more exactly, a file name relative to the document containing it. + * It is only possible to convert such names to absolute file names if the + * name of the document containing such references is known; and File + * denotes this name. + * + * - Channel, Latin1: These sources read from documents given as channels or + * (Latin 1-encoded) strings. There is no file name, and because of this + * the documents must not contain references to external files (even + * if the file names are given as absolute names). + * + * - ExtID(x,r): The identifier x (either the SYSTEM or the PUBLIC name) of the + * entity to read from is passed to the resolver r as-is. + * The intention of this option is to allow customized + * resolvers to interpret external identifiers without any restriction. + * For example, you can assign the PUBLIC identifiers a meaning (they + * currently do not have any), or you can extend the "namespace" of + * identifiers. + * ExtID is the interface of choice for own extensions to resolvers. + * + * - Entity(m,r): You can implementy every behaviour by using a customized + * entity class. Once the DTD object d is known that will be used during + * parsing, the entity e = m d is determined and used together with the + * resolver r. + * This is only for hackers. + *) + + +type 'ext domspec = + { map : (node_type, 'ext node) Hashtbl.t; + default_element : 'ext node; + } + (* Specifies which node to use as exemplar for which node type. See the + * manual for explanations. + *) + +val default_config : config + (* - The resolver is able to read from files by name + * - Warnings are thrown away + * - Error message will contain line numbers + * - The internal encoding is ISO-8859-1 + * - standalone declaration is checked + *) + +val default_extension : ('a node extension) as 'a + (* A "null" extension; an extension that does not extend the funtionality *) + +val default_dom : ('a node extension as 'a) domspec + (* Specifies that you do not want to use extensions. *) + +val parse_dtd_entity : config -> source -> dtd + (* Parse an entity containing a DTD, and return this DTD. *) + +val parse_document_entity : config -> source -> 'ext domspec -> 'ext document + (* Parse a closed document, i.e. a document beginning with , + * and validate the contents of the document against the DTD contained + * and/or referenced in the document. + *) + +val parse_content_entity : config -> + source -> + dtd -> + 'ext domspec -> + 'ext node + (* Parse a file representing a well-formed fragment of a document. The + * fragment must be a single element (i.e. something like ...; + * not a sequence like ......). The element is validated + * against the passed DTD, but it is not checked whether the element is + * the root element specified in the DTD. + * Note that you can create DTDs that specify not to validate at all + * (invoke method allow_arbitrary on the DTD). + *) + +val parse_wf_entity : config -> source -> 'ext domspec -> 'ext document + (* Parse a closed document (see parse_document_entity), but do not + * validate it. Only checks on well-formedness are performed. + *) + +(*$-*) + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:30 lpadovan + * Initial revision + * + * Revision 1.1 2000/05/29 23:43:51 gerd + * Initial compatibility revision. + * + * ====================================================================== + * OLD LOGS: + * + * Revision 1.4 2000/05/29 21:14:57 gerd + * Changed the type 'encoding' into a polymorphic variant. + * + * Revision 1.3 2000/05/27 19:24:01 gerd + * New option: recognize_standalone_declaration. + * + * Revision 1.2 2000/05/20 20:31:40 gerd + * Big change: Added support for various encodings of the + * internal representation. + * + * Revision 1.1 2000/05/06 23:21:49 gerd + * Initial revision. + * + * Revision 1.9 2000/04/30 18:23:38 gerd + * New config options 'processing_instructions_inline' and + * 'virtual_root'. + * + * Revision 1.8 2000/03/13 23:46:46 gerd + * Change: The 'resolver' component of the 'config' type has + * disappeared. Instead, there is a new resolver component in the Entity + * and ExtID values of 'source'. I hope that this makes clearer that the + * resolver has only an effect if used together with Entity and ExtID + * sources. + * Change: The Entity value can now return the entity dependent + * on the DTD that is going to be used. + * + * Revision 1.7 2000/02/22 02:32:02 gerd + * Updated. + * + * Revision 1.6 2000/02/22 01:52:45 gerd + * Added documentation. + * + * Revision 1.5 2000/01/20 20:54:43 gerd + * New config.errors_with_line_numbers. + * + * Revision 1.4 1999/09/01 23:09:10 gerd + * New function parse_wf_entity that simulates a well-formedness + * parser. + * + * Revision 1.3 1999/09/01 16:26:36 gerd + * Added an empty line. This is *really* a big change. + * + * Revision 1.2 1999/08/14 22:20:27 gerd + * The "config" slot has now a component "warner"which is + * an object with a "warn" method. This is used to warn about characters + * that cannot be represented in the Latin 1 alphabet. + * Furthermore, there is a new component "debugging_mode". + * + * Revision 1.1 1999/08/10 00:35:52 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB new file mode 100644 index 000000000..d942e2786 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB @@ -0,0 +1,52 @@ +****************************************************************************** +ABOUT-FINDLIB - Package manager for O'Caml +****************************************************************************** + + +============================================================================== +Abstract +============================================================================== + +The findlib library provides a scheme to manage reusable software components +(packages), and includes tools that support this scheme. Packages are +collections of OCaml modules for which metainformation can be stored. The +packages are kept in the filesystem hierarchy, but with strict directory +structure. The library contains functions to look the directory up that stores +a package, to query metainformation about a package, and to retrieve dependency +information about multiple packages. There is also a tool that allows the user +to enter queries on the command-line. In order to simplify compilation and +linkage, there are new frontends of the various OCaml compilers that can +directly deal with packages. + +Together with the packages metainformation is stored. This includes a version +string, the archives the package consists of, and additional linker options. +Packages can also be dependent on other packages. There is a query which finds +out all predecessors of a list of packages and sorts them topologically. The +new compiler frontends do this implicitly. + +Metainformation can be conditional, i.e. depend on a set of predicates. This is +mainly used to be able to react on certain properties of the environment, such +as if the bytecode or the native compiler is invoked, if the application is +multi-threaded, and a few more. If the new compiler frontends are used, most +predicates are found out automatically. + +There is special support for scripts. A new directive, "#require", loads +packages into scripts. Of course, this works only with newly created toploops +which include the findlib library. + +============================================================================== +Where to get findlib +============================================================================== + +The manual of findlib is available online [1]. You can download findlib here +[2]. + + +-------------------------- + +[1] see http://www.ocaml-programming.de/packages/documentation/findlib/ + +[2] see http://www.ocaml-programming.de/packages/findlib-0.3.1.tar.gz + + + diff --git a/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml new file mode 100644 index 000000000..d1dc5b04e --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/ABOUT-FINDLIB.xml @@ -0,0 +1,61 @@ + + +%common; + +findlib"> +Findlib"> + +]> + + + + Abstract +

+The &f; library provides a scheme to manage reusable software +components (packages), and includes tools that support this +scheme. Packages are collections of OCaml modules for which +metainformation can be stored. The packages are kept in the filesystem +hierarchy, but with strict directory structure. The library contains +functions to look the directory up that stores a package, to query +metainformation about a package, and to retrieve dependency +information about multiple packages. There is also a tool that allows +the user to enter queries on the command-line. In order to simplify +compilation and linkage, there are new frontends of the various OCaml +compilers that can directly deal with packages. +

+ +

+Together with the packages metainformation is stored. This includes a +version string, the archives the package consists of, and additional +linker options. Packages can also be dependent on other +packages. There is a query which finds out all predecessors of a list +of packages and sorts them topologically. The new compiler frontends +do this implicitly. +

+ +

+Metainformation can be conditional, i.e. depend on a set of +predicates. This is mainly used to be able to react on certain +properties of the environment, such as if the bytecode or the native +compiler is invoked, if the application is multi-threaded, and a few +more. If the new compiler frontends are used, most predicates are +found out automatically. +

+ +

+There is special support for scripts. A new directive, "#require", +loads packages into scripts. Of course, this works only with newly +created toploops which include the &f; library. +

+ +
+ + Where to get findlib +

+The manual of &f; is available online. +You can download &f; here. +

+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/EXTENSIONS b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS new file mode 100644 index 000000000..a95683910 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS @@ -0,0 +1,50 @@ +****************************************************************************** +Extensions of the XML specification +****************************************************************************** + + +============================================================================== +This document +============================================================================== + +This parser has some options extending the XML specification. Here, the options +are explained. + +============================================================================== +Optional declarations instead of mandatory declarations +============================================================================== + +The XML spec demands that elements, notations, and attributes must be declared. +However, there are sometimes situations where a different rule would be better: +If there is a declaration, the actual instance of the element type, notation +reference or attribute must match the pattern of the declaration; but if the +declaration is missing, a reasonable default declaration should be assumed. + +I have an example that seems to be typical: The inclusion of HTML into a meta +language. Imagine you have defined some type of "generator" or other tool +working with HTML fragments, and your document contains two types of elements: +The generating elements (with a name like "gen:xxx"), and the object elements +which are HTML. As HTML is still evolving, you do not want to declare the HTML +elements; the HTML fragments should be treated as well-formed XML fragments. In +contrast to this, the elements of the generator should be declared and +validated because you can more easily detect errors. + +The following two processing instructions can be included into the DTD: + +- + + + References to unknown element types and notations no longer cause an error. + The element may contain everything, but it must be still well-formed. It may + have arbitrary attributes, and every attribute is treated as an #IMPLIED + CDATA attribute. + +- + + + References to unknown attributes inside one of the enumerated elements no + longer cause an error. Such an attribute is treated as an #IMPLIED CDATA + attribute. + If there are several "optional-attribute-declarations" PIs, they are all + interpreted (implicitly merged). + diff --git a/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml new file mode 100644 index 000000000..e64d06152 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/EXTENSIONS.xml @@ -0,0 +1,62 @@ + + +%common; + + +up'> + + +%config; + +]> + + + + + This document +

This parser has some options extending the XML specification. Here, the +options are explained. +

+
+ + + Optional declarations instead of mandatory declarations + +

The XML spec demands that elements, notations, and attributes must be +declared. However, there are sometimes situations where a different rule would +be better: If there is a declaration, the actual instance of the +element type, notation reference or attribute must match the pattern of the +declaration; but if the declaration is missing, a reasonable default declaration +should be assumed.

+ +

I have an example that seems to be typical: The inclusion of HTML into a +meta language. Imagine you have defined some type of "generator" or other tool +working with HTML fragments, and your document contains two types of elements: +The generating elements (with a name like "gen:xxx"), and the object elements +which are HTML. As HTML is still evolving, you do not want to declare the HTML +elements; the HTML fragments should be treated as well-formed XML fragments. In +contrast to this, the elements of the generator should be declared and +validated because you can more easily detect errors.

+ +

The following two processing instructions can be included into the DTD:

+
    +
  • ]]> + References to unknown element types and notations no longer cause an + error. The element may contain everything, but it must be still + well-formed. It may have arbitrary attributes, and every attribute is + treated as an #IMPLIED CDATA attribute.

    +
  • +
  • ]]> + References to unknown attributes inside one of the enumerated elements + no longer cause an error. Such an attribute is treated as an #IMPLIED + CDATA attribute. +

    + +

    If there are several "optional-attribute-declarations" PIs, they are all +interpreted (implicitly merged).

    +
  • +
+
+
diff --git a/helm/DEVEL/pxp/pxp/doc/INSTALL b/helm/DEVEL/pxp/pxp/doc/INSTALL new file mode 100644 index 000000000..9a49a2217 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/INSTALL @@ -0,0 +1,154 @@ +****************************************************************************** +INSTALL - PXP, the XML parser for O'Caml +****************************************************************************** + + +============================================================================== +The "pxp" package +============================================================================== + +------------------------------------------------------------------------------ +Prerequisites +------------------------------------------------------------------------------ + +PXP requires that the netstring package [1] is already installed. PXP works +only with O'Caml 3.00 (the support for 2.04 has been dropped). The installation +procedure defined in the Makefile requires findlib [2] to work [3]. + +------------------------------------------------------------------------------ +Configuration +------------------------------------------------------------------------------ + +It is not necessary to configure PXP; but you can switch off the UTF-8 support +by setting the variable + +UTF8_SUPPORT = no + +in Makefile.conf. In this case, the UTF-8 modules are not even compiled. - By +default, the UTF-8 support is enabled. + +Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II; if +this is too long, you can set UTF8_SUPPORT to "no". + +------------------------------------------------------------------------------ +Compilation +------------------------------------------------------------------------------ + +The Makefile defines the following goals: + +- make all + compiles with the bytecode compiler and creates the files pxp_types.cma, + pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma, and pxp_utf8.cmo + (*). The (*) files are not built if the UTF-8 support is switched off. + +- make opt + compiles with the native compiler and creates the files pxp_types.cmxa, + pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa, and + pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support is + switched off. + +------------------------------------------------------------------------------ +Installation +------------------------------------------------------------------------------ + +The Makefile defines the following goals: + +- make install + installs the bytecode archives, the interface definitions, and if present, + the native archives in the default location of findlib as package "pxp" + +- make uninstall + removes the package "pxp" + +- make markup-install + installs the Markup compatibility API as package "markup" + +- make markup-uninstall + removes the package "markup" + +------------------------------------------------------------------------------ +Usage with the help of "findlib" +------------------------------------------------------------------------------ + +You can refer to the parser as the findlib package "pxp": + +ocamlfind ocamlc -package pxp ... + +By default, the UTF-8 support modules will be linked in. If you do not need +them, you may define the predicate "pxp_without_utf8", which causes that the +UTF-8 relevant parts are not linked with your program; the difference in size +is about 1 MB: + +ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ... + +Note that you can also reduce the size of the resulting executable by +specifying Netstring-related predicates (e.g. netstring_only_iso); see the +documentation of Netstring. + +------------------------------------------------------------------------------ +Linking with the archives directly +------------------------------------------------------------------------------ + +If you need UTF-8 support, you must link your program as follows: + +ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma + pxp_engine.cma pxp_utf8.cmo ... + +If you do not need UTF-8, the following suffices: + +ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ... + + + +============================================================================== +The examples +============================================================================== + +In the "examples" directory you find several applications of PXP. They require +that PXP has been installed using findlib. See the Makefiles in the directories +for descriptions of "make" goals. + +============================================================================== +Trouble shooting +============================================================================== + +------------------------------------------------------------------------------ +Solaris +------------------------------------------------------------------------------ + +The "make" utility of Solaris does not work properly enough; there is a bug in +it that prevents the so-called suffix rules from being recognized. There are +two solutions: + +- Install GNU make and use it instead of Solaris make. This is the recommended + way to solve the problem, as GNU make can process almost every Makefile from + open source projects, and you will never have problems with building + software again. + +- Add the following lines to Makefile.code: + + %.cmx: %.ml + $(OCAMLOPT) -c $< + + %.cmo: %.ml + $(OCAMLC) -c $< + + %.cmi: %.mli + $(OCAMLC) -c $< + + %.ml: %.mll + ocamllex $< + + + + +-------------------------- + +[1] see http://www.ocaml-programming.de/packages/documentation/netstring + +[2] see http://www.ocaml-programming.de/packages/documentation/findlib/ + +[3] Findlib is a package manager, see the file ABOUT-FINDLIB. + + + diff --git a/helm/DEVEL/pxp/pxp/doc/INSTALL.xml b/helm/DEVEL/pxp/pxp/doc/INSTALL.xml new file mode 100644 index 000000000..ac7832dbb --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/INSTALL.xml @@ -0,0 +1,171 @@ + + +%common; + +PXP"> + +]> + + + The "pxp" package + Prerequisites +

+&m; requires that the netstring package + is already installed. &m; works +only with O'Caml 3.00 (the support for 2.04 has been dropped). +The installation +procedure defined in the Makefile requires findlib to workFindlib is a +package manager, see the file ABOUT-FINDLIB.. +

+
+ + Configuration +

+It is not necessary to configure PXP; but you can switch off the UTF-8 +support by setting the variable + + +UTF8_SUPPORT = no + + +in Makefile.conf. In this case, the UTF-8 modules are not even compiled. +- By default, the UTF-8 support is enabled. +

+ +

+Note: Compiling the UTF-8 modules lasts 10 minutes on my 400 Mhz Pentium II; +if this is too long, you can set UTF8_SUPPORT to "no".

+
+ + Compilation +

+The Makefile defines the following goals: +

+
    +
  • +

    make all

    +

    compiles with the bytecode compiler and creates the files +pxp_types.cma, pxp_lex_iso88591.cma, pxp_lex_utf8.cma (*), pxp_engine.cma, +and pxp_utf8.cmo (*). The (*) files are not built if the UTF-8 support +is switched off.

    +
  • +
  • +

    make opt

    +

    compiles with the native compiler and creates the files +pxp_types.cmxa, pxp_lex_iso88591.cmxa, pxp_lex_utf8.cmxa (*), pxp_engine.cmxa, +and pxp_utf8.cmx (*). The (*) files are not built if the UTF-8 support +is switched off.

    +
  • +
+
+ + Installation +

+The Makefile defines the following goals:

+
    +
  • +

    make install

    +

    installs the bytecode archives, the interface definitions, and if +present, the native archives in the default location of findlib as +package "pxp" +

    +
  • +
  • +

    make uninstall

    +

    removes the package "pxp"

    +
  • +
  • +

    make markup-install

    +

    installs the Markup compatibility API as package "markup"

    +
  • +
  • +

    make markup-uninstall

    +

    removes the package "markup"

    +
  • +
+
+ + + Usage with the help of "findlib" +

You can refer to the parser as the findlib package "pxp": + + +ocamlfind ocamlc -package pxp ... + + +By default, the UTF-8 support modules will be linked in. If you do not need +them, you may define the predicate "pxp_without_utf8", which causes that the +UTF-8 relevant parts are not linked with your program; the difference in size +is about 1 MB: + + +ocamlfind ocamlc -package pxp -predicates pxp_without_utf8 ... + + +Note that you can also reduce the size of the resulting executable by +specifying Netstring-related predicates (e.g. netstring_only_iso); see the +documentation of Netstring. +

+
+ + + Linking with the archives directly +

If you need UTF-8 support, you must link your program as follows: + + +ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_lex_utf8.cma + pxp_engine.cma pxp_utf8.cmo ... + + +If you do not need UTF-8, the following suffices: + + +ocamlc ... pxp_types.cma pxp_lex_iso88591.cma pxp_engine.cma ... + + +

+
+ +
+ + The examples +

+In the "examples" directory you find several applications of &m;. They require +that &m; has been installed using findlib. See the Makefiles in the +directories for descriptions of "make" goals. +

+
+ + Trouble shooting + Solaris +

+The "make" utility of Solaris does not work properly enough; there is a bug +in it that prevents the so-called suffix rules from being recognized. There +are two solutions:

+
    +
  • Install GNU make and use it instead of Solaris make. This is +the recommended way to solve the problem, as GNU make can process almost +every Makefile from open source projects, and you will never have problems +with building software again.

  • +
  • Add the following lines to Makefile.code: + +%.cmx: %.ml + $(OCAMLOPT) -c $< + +%.cmo: %.ml + $(OCAMLC) -c $< + +%.cmi: %.mli + $(OCAMLC) -c $< + +%.ml: %.mll + ocamllex $< + +

  • +
+
+
+
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/Makefile b/helm/DEVEL/pxp/pxp/doc/Makefile new file mode 100644 index 000000000..0ed12741c --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/Makefile @@ -0,0 +1,43 @@ +.PHONY: all +all: README INSTALL ABOUT-FINDLIB SPEC PRERELEASE EXTENSIONS + +README: README.xml common.xml config.xml + readme -text README.xml >README + +INSTALL: INSTALL.xml common.xml config.xml + readme -text INSTALL.xml >INSTALL + +ABOUT-FINDLIB: ABOUT-FINDLIB.xml common.xml config.xml + readme -text ABOUT-FINDLIB.xml >ABOUT-FINDLIB + +SPEC: SPEC.xml common.xml config.xml + readme -text SPEC.xml >SPEC + +EXTENSIONS: EXTENSIONS.xml common.xml config.xml + readme -text EXTENSIONS.xml >EXTENSIONS + +PRERELEASE: PRERELEASE.xml common.xml config.xml + readme -text PRERELEASE.xml >PRERELEASE + +config.xml: + touch config.xml + +common.xml: + ln -s dist-common.xml common.xml + +.PHONY: clean +clean: + +.PHONY: CLEAN +CLEAN: clean + $(MAKE) -C manual CLEAN + +.PHONY: distclean +distclean: clean + rm -f *~ + $(MAKE) -C manual distclean + +.PHONY: symlinks +symlinks: + ln -s ../examples/readme/readme.dtd . + diff --git a/helm/DEVEL/pxp/pxp/doc/PRERELEASE b/helm/DEVEL/pxp/pxp/doc/PRERELEASE new file mode 100644 index 000000000..bc46cd059 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/PRERELEASE @@ -0,0 +1,103 @@ +****************************************************************************** +README - PXP, the XML parser for O'Caml +****************************************************************************** + + +============================================================================== +Pre-release of PXP, the XML parser for O'Caml +============================================================================== + +PXP is the new, completely revised and partly rewritten validating XML parser +for O'Caml; the old name, "Markup", has been dropped. The current version of +PXP is still a bit experimental because it is not fully tested; however, it is +now stable enough to be used in experimental applications. + +PXP will retain most parts of Markup's API; the name PXP emphasizes the +strengths of the API: it is the Polymorphic XML Parser. The document objects +representing the parsed file have an interesting polymorphism which allows that +the user of the parser can control which kind of objects are actually created. +The current API supports the element type as criterion for object/class +selection; future APIs will extend this concept such that arbitrary criterions +are possible (e.g. you may want to have different classes for different +namespaces). + +The current development goals of PXP are: + +- Full XML-1.0 conformance: The current pre-release is now very close to + strict XML-1.0 conformance. The only bigger difference to the standard is + that PXP sometimes accepts DTDs as legal while the standard forbids them + (non-deterministic content models). + One of the more important improvements since 0.2.10 is the possibility to + represent XML documents internally as UTF-8 strings, not only as ISO-8859-1 + strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer + preprocessor hiding the details of the UTF-8 encoding in the lexer + definitions. + +- Correctness of validation: The well-formedness and valididity constraints + must be implemented as correct as possible. The last stable release had + already a regression test covering many aspects of XML. The test suite will + be extended. + +- Parsing performance: It should be possible to process large amounts of data + in a reasoable period of time. The last stable release had many stages of + processing that wasted time. + The current pre-release is already 30 per cent faster than 0.2.10. + +- Simplicity of usage: Unlike parsers basing on imperative languages and DOM, + the usage of PXP should be simple, even for complex tasks. The current + parser API has already many advantages over DOM; especially it is well + integrated into the functional and object-oriented language O'Caml. You do + not have to deal with artificial representations like "node lists" while the + programming environment already provides good support for list structures. + The fact that O'Caml allows a functional programming style is interesting + for programs transforming XML trees. + +============================================================================== +Download the PXP pre-release +============================================================================== + +The current pre-release is available under +http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz [1]. There is +currently no documentation for this version of the software; it is recommended +to use the Markup manual [2] and compare it with the current module interfaces. + +Please note that this is work in progress; it may still contain bugs and +irregularities. + +The parser works only with OCaml-3. The parser needs the netstring package [3], +at least version 0.9.1. + +I am very interested in your opinion to PXP; please contact me [4]. + +============================================================================== +Author, Credits, Copying +============================================================================== + +PXP has been written by Gerd Stolpmann [5]; it contains contributions by +Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for +commercial purposes as long as the license conditions are respected, see the +file LICENSE coming with the distribution. It allows almost everything. + +============================================================================== +Where to find the stable release +============================================================================== + +Here. [6] + + +-------------------------- + +[1] see http://www.ocaml-programming.de/packages/pxp-pre-0.99.8.tar.gz + +[2] see http://www.ocaml-programming.de/packages/documentation/markup/manual + +[3] see http://www.ocaml-programming.de/packages/documentation/netstring + +[4] see mailto:gerd@gerd-stolpmann.de + +[5] see mailto:gerd@gerd-stolpmann.de + +[6] see http://www.ocaml-programming.de/packages/documentation/markup + + + diff --git a/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml b/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml new file mode 100644 index 000000000..f155abd96 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/PRERELEASE.xml @@ -0,0 +1,116 @@ + + +%common; + + +up'> + + +%config; + +]> + + + + Pre-release of PXP, the XML parser for O'Caml + +

PXP is the new, completely revised and partly rewritten +validating XML parser +for O'Caml; the old name, "Markup", has been dropped. The current version +of PXP is still a bit experimental because it is not fully tested; however, +it is now stable enough to be used in experimental applications. +

+ +

PXP will retain most parts of Markup's API; the name PXP +emphasizes the strengths of the API: it is the Polymorphic XML Parser. +The document objects representing the parsed file have an interesting +polymorphism which allows that the user of the parser can control +which kind of objects are actually created. The current API supports +the element type as criterion for object/class selection; future APIs will +extend this concept such that arbitrary criterions are possible +(e.g. you may want to have different classes for different namespaces). +

+ +

The current development goals of PXP are:

+ +
    +
  • Full XML-1.0 conformance: The current pre-release +is now very close to strict XML-1.0 conformance. The only bigger +difference to the standard is that PXP sometimes accepts DTDs as legal +while the standard forbids them (non-deterministic content models).

    + +

    One of the more important improvements since 0.2.10 is the possibility to +represent XML documents internally as UTF-8 strings, not only as ISO-8859-1 +strings. Thanks to Claudio Sacerdoti Coen who contributed a special lexer +preprocessor hiding the details of the UTF-8 encoding in the lexer definitions. +

    +
  • + +
  • Correctness of validation: The well-formedness +and valididity constraints must be implemented as correct as possible. +The last stable release had already a regression test covering many +aspects of XML. The test suite will be extended.

    +
  • + +
  • Parsing performance: It should be possible to +process large amounts of data in a reasoable period of time. The last +stable release had many stages of processing that wasted time.

    + +

    The current pre-release is already 30 per cent faster than +0.2.10.

    +
  • + +
  • Simplicity of usage: Unlike parsers basing on +imperative languages and DOM, the usage of PXP should be simple, even +for complex tasks. The current parser API has already many advantages +over DOM; especially it is well integrated into the functional and +object-oriented language O'Caml. You do not have to deal with +artificial representations like "node lists" while the programming +environment already provides good support for list structures. The +fact that O'Caml allows a functional programming style is interesting +for programs transforming XML trees.

    +
  • +
+
+ + + Download the PXP pre-release + +

The current pre-release is available under + +&url.gps-ocaml-download;/pxp-pre-0.99.8.tar.gz. There is currently no +documentation for this version of the software; it is recommended to use the Markup manual and compare it with the current +module interfaces.

+ +

Please note that this is work in progress; it may still contain bugs +and irregularities.

+ +

The parser works only with OCaml-3. The parser needs the netstring package, at least version 0.9.1. +

+ +

I am very interested in your opinion to PXP; please contact me.

+
+ + + Author, Credits, Copying +

+PXP has been written by &person.gps;; it contains contributions by +Claudio Sacerdoti Coen. You may copy it as you like, +you may use it even for commercial purposes as long as the license conditions +are respected, see the file LICENSE coming with the distribution. It allows +almost everything. +

+
+ + + Where to find the stable release +

Here.

+
+ +
+ diff --git a/helm/DEVEL/pxp/pxp/doc/README b/helm/DEVEL/pxp/pxp/doc/README new file mode 100644 index 000000000..b7ad5de59 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/README @@ -0,0 +1,247 @@ +****************************************************************************** +README - PXP, the XML parser for O'Caml +****************************************************************************** + + +============================================================================== +Abstract +============================================================================== + +PXP is a validating parser for XML-1.0 which has been written entirely in +Objective Caml. + +PXP is the new name of the parser formerly known as "Markup". PXP means +"Polymorphic XML parser" and emphasizes its most useful property: that the API +is polymorphic and can be configured such that different objects are used to +store different types of elements. + +============================================================================== +Download +============================================================================== + +You can download PXP as gzip'ed tarball [1]. The parser needs the Netstring [2] +package (0.9.3). Note that PXP requires O'Caml 3.00. + +============================================================================== +User's Manual +============================================================================== + +The manual is included in the distribution both as Postscript document and +bunch of HTML files. An online version can be found here [3]. + +============================================================================== +Author, Credits, Copying +============================================================================== + +PXP has been written by Gerd Stolpmann [4]; it contains contributions by +Claudio Sacerdoti Coen. You may copy it as you like, you may use it even for +commercial purposes as long as the license conditions are respected, see the +file LICENSE coming with the distribution. It allows almost everything. + +Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug reports. + +============================================================================== +Description +============================================================================== + +PXP is a validating XML parser for O'Caml [5]. It strictly complies to the +XML-1.0 [6] standard. + +The parser is simple to call, usually only one statement (function call) is +sufficient to parse an XML document and to represent it as object tree. + +Once the document is parsed, it can be accessed using a class interface. The +interface allows arbitrary access including transformations. One of the +features of the document representation is its polymorphic nature; it is simple +to add custom methods to the document classes. Furthermore, the parser can be +configured such that different XML elements are represented by objects created +from different classes. This is a very powerful feature, because it simplifies +the structure of programs processing XML documents. + +Note that the class interface does not comply to the DOM standard. It was not a +development goal to realize a standard API (industrial developers can this much +better than I); however, the API is powerful enough to be considered as +equivalent with DOM. More important, the interface is compatible with the XML +information model required by many XML-related standards. + +------------------------------------------------------------------------------ +Detailed feature list +------------------------------------------------------------------------------ + +- The XML instance is validated against the DTD; any violation of a validation + constraint leads to the rejection of the instance. The validator has been + carefully implemented, and conforms strictly to the standard. If needed, it + is also possible to run the parser in a well-formedness mode. + +- If possible, the validator applies a deterministic finite automaton to + validate the content models. This ensures that validation can always be + performed in linear time. However, in the case that the content models are + not deterministic, the parser uses a backtracking algorithm which can be + much slower. - It is also possible to reject non-deterministic content + models. + +- In particular, the validator also checks the complicated rules whether + parentheses are properly nested with respect to entities, and whether the + standalone declaration is satisfied. On demand, it is checked whether the + IDREF attributes only refer to existing nodes. + +- Entity references are automatically resolved while the XML text is being + scanned. It is not possible to recognize in the object tree where a + referenced entity begins or ends; the object tree only represents the + logical structure. + +- External entities are loaded using a configurable resolver infrastructure. + It is possible to connect the parser with an arbitrary XML source. + +- The parser can read XML text encoded in a variety of character sets. + Independent of this, it is possible to choose the encoding of the internal + representation of the tree nodes; the parser automatically converts the + input text to this encoding. Currently, the parser supports UTF-8 and + ISO-8859-1 as internal encodings. + +- The interface of the parser has been designed such that it is best + integrated into the language O'Caml. The first goal was simplicity of usage + which is achieved by many convenience methods and functions, and by allowing + the user to select which parts of the XML text are actually represented in + the tree. For example, it is possible to store processing instructions as + tree nodes, but the parser can also be configured such that these + instructions are put into hashtables. The information model is compatible + with the requirements of XML-related standards such as XPath. + +- In particular, the node tree can optionally contain or leave out processing + instructions and comments. It is also possible to generate a "super root" + object which is the parent of the root element. The attributes of elements + are normally not stored as nodes, but it is possible to get them wrapped + into nodes. + +- There is also an interface for DTDs; you can parse and access sequences of + declarations. The declarations are fully represented as recursive O'Caml + values. + +------------------------------------------------------------------------------ +Code examples +------------------------------------------------------------------------------ + +This distribution contains several examples: + +- validate: simply parses a document and prints all error messages + +- readme: Defines a DTD for simple "README"-like documents, and offers + conversion to HTML and text files [7]. + +- xmlforms: This is already a sophisticated application that uses XML as style + sheet language and data storage format. It shows how a Tk user interface can + be configured by an XML style, and how data records can be stored using XML. + +------------------------------------------------------------------------------ +Restrictions and missing features +------------------------------------------------------------------------------ + +The following restrictions apply that are not violations of the standard: + +- The attributes "xml:space", and "xml:lang" are not supported specially. (The + application can do this.) + +- The built-in support for SYSTEM and PUBLIC identifiers is limited to local + file access. There is no support for catalogs. The parser offers a hook to + add missing features. + +- It is currently not possible to check for interoperatibility with SGML. + +The following features are also missing: + +- There is no special support for namespaces. (Perhaps in the next release?) + +- There is no support for XPATH or XSLT. + +However, I hope that these features will be implemented soon, either by myself +or by contributors (who are invited to do so). + +------------------------------------------------------------------------------ +Recent Changes +------------------------------------------------------------------------------ + +- Changed in 1.0: + Support for document order. + +- Changed in 0.99.8: + Several fixes of bugs reported by Haruo Hosoya and Alain Frisch. + The class type "node" has been extended: you can go directly to the next and + previous nodes in the list; you can refer to nodes by position. + There are now some iterators for nodes: find, find_all, find_element, + find_all_elements, map_tree, iter_tree. + Experimental support for viewing attributes as nodes; I hope that helps + Alain writing his XPath evaluator. + The user's manual has been revised and is almost up to date. + +- Changed in 0.99.7: + There are now additional node types T_super_root, T_pinstr and T_comment, + and the parser is able to create the corresponding nodes. + The functions for character set conversion have been moved to the Netstring + package; they are not specific for XML. + +- Changed in 0.99.6: + Implemented a check on deterministic content models. Added an alternate + validator basing on a DFA. - This means that now all mandatory features for + an XML-1.0 parser are implemented! The parser is now substantially complete. + +- Changed in 0.99.5: + The handling of ID and IDREF attributes has changed. The index of nodes + containing an ID attribute is now separated from the document. Optionally + the parser now checks whether the IDREF attributes refer to existing + elements. + The element nodes can optionally store the location in the source XML code. + The method 'write' writes the XML tree in every supported encoding. + (Successor of 'write_compact_as_latin1'.) + Several smaller changes and fixes. + +- Changed in 0.99.4: + The module Pxp_reader has been modernized. The resolver classes are simpler + to use. There is now support for URLs. + The interface of Pxp_yacc has been improved: The type 'source' is now + simpler. The type 'domspec' has gone; the new 'spec' is opaque and performs + better. There are some new parsing modes. + Many smaller changes. + +- Changed in 0.99.3: + The markup_* modules have been renamed to pxp_*. There is a new + compatibility API that tries to be compatible with markup-0.2.10. + The type "encoding" is now a polymorphic variant. + +- Changed in 0.99.2: + Added checks for the constraints about the standalone declaration. + Added regression tests about attribute normalization, attribute checks, + standalone checks. + Fixed some minor errors of the attribute normalization function. + The bytecode/native archives are now separated in a general part, in a + ISO-8859-1-relevant part, and a UTF-8-relevant part. The parser can again be + compiled with ocamlopt. + +- Changed in 0.99.1: + In general, this release is an early pre-release of the next stable version + 1.00. I do not recommend to use it for serious work; it is still very + experimental! + The core of the parser has been rewritten using a self-written parser + generator. + The lexer has been restructured, and can now handle UTF-8 encoded files. + Numerous other changes. + + +-------------------------- + +[1] see http://www.ocaml-programming.de/packages/pxp-1.0.tar.gz + +[2] see http://www.ocaml-programming.de/packages/documentation/netstring + +[3] see http://www.ocaml-programming.de/packages/documentation/pxp/manual + +[4] see mailto:gerd@gerd-stolpmann.de + +[5] see http://caml.inria.fr/ + +[6] see http://www.w3.org/TR/1998/REC-xml-19980210.html + +[7] This particular document is an example of this DTD! + + + diff --git a/helm/DEVEL/pxp/pxp/doc/README.xml b/helm/DEVEL/pxp/pxp/doc/README.xml new file mode 100644 index 000000000..34c7726ad --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/README.xml @@ -0,0 +1,423 @@ + + + + + + +Gerd Stolpmann'> +--> + + +%common; + + +up'> + + +%config; + +]> + + + + Abstract +

+PXP is a validating parser for XML-1.0 which has been written +entirely in Objective Caml. +

+ +

PXP is the new name of the parser formerly known as "Markup". +PXP means "Polymorphic XML parser" and emphasizes its most useful +property: that the API is polymorphic and can be configured such that +different objects are used to store different types of elements.

+
+ + + Download +

+You can download PXP as gzip'ed tarball. The parser needs the Netstring package (0.9.3). Note that PXP +requires O'Caml 3.00. +

+
+ + + User's Manual +

+The manual is included in the distribution both as Postscript document and +bunch of HTML files. An online version can be found here. +

+
+ + + Author, Credits, Copying +

+PXP has been written by &person.gps;; it contains contributions by +Claudio Sacerdoti Coen. You may copy it as you like, +you may use it even for commercial purposes as long as the license conditions +are respected, see the file LICENSE coming with the distribution. It allows +almost everything. +

+ +

Thanks also to Alain Frisch and Haruo Hosoya for discussions and bug +reports.

+
+ + + Description +

+PXP is a validating XML parser for O'Caml. It strictly complies to the +XML-1.0 standard. +

+ +

The parser is simple to call, usually only one statement (function +call) is sufficient to parse an XML document and to represent it as object +tree.

+ +

+Once the document is parsed, it can be accessed using a class interface. +The interface allows arbitrary access including transformations. One of +the features of the document representation is its polymorphic nature; +it is simple to add custom methods to the document classes. Furthermore, +the parser can be configured such that different XML elements are represented +by objects created from different classes. This is a very powerful feature, +because it simplifies the structure of programs processing XML documents. +

+ +

+Note that the class interface does not comply to the DOM standard. It was not a +development goal to realize a standard API (industrial developers can this much +better than I); however, the API is powerful enough to be considered as +equivalent with DOM. More important, the interface is compatible with the +XML information model required by many XML-related standards. +

+ + + Detailed feature list + +
    +
  • The XML instance is validated against the DTD; any violation of +a validation constraint leads to the rejection of the instance. The validator +has been carefully implemented, and conforms strictly to the standard. If +needed, it is also possible to run the parser in a well-formedness mode.

    +
  • +
  • If possible, the validator applies a deterministic finite +automaton to validate the content models. This ensures that validation can +always be performed in linear time. However, in the case that the content +models are not deterministic, the parser uses a backtracking algorithm which +can be much slower. - It is also possible to reject non-deterministic content +models.

    +
  • +
  • In particular, the validator also checks the complicated rules +whether parentheses are properly nested with respect to entities, and whether +the standalone declaration is satisfied. On demand, it is checked whether the +IDREF attributes only refer to existing nodes.

    +
  • +
  • Entity references are automatically resolved while the XML text +is being scanned. It is not possible to recognize in the object tree where a +referenced entity begins or ends; the object tree only represents the logical structure.

    +
  • +
  • External entities are loaded using a configurable resolver +infrastructure. It is possible to connect the parser with an arbitrary XML source.

    +
  • +
  • The parser can read XML text encoded in a variety of character +sets. Independent of this, it is possible to choose the encoding of the +internal representation of the tree nodes; the parser automatically converts +the input text to this encoding. Currently, the parser supports UTF-8 and +ISO-8859-1 as internal encodings.

    +
  • +
  • The interface of the parser has been designed such that it is +best integrated into the language O'Caml. The first goal was simplicity of +usage which is achieved by many convenience methods and functions, and by +allowing the user to select which parts of the XML text are actually +represented in the tree. For example, it is possible to store processing +instructions as tree nodes, but the parser can also be configured such that +these instructions are put into hashtables. The information model is compatible +with the requirements of XML-related standards such as XPath.

    +
  • +
  • In particular, the node tree can optionally contain or leave out +processing instructions and comments. It is also possible to generate a "super +root" object which is the parent of the root element. The attributes of +elements are normally not stored as nodes, but it is possible to get them +wrapped into nodes.

    +
  • +
  • There is also an interface for DTDs; you can parse and access +sequences of declarations. The declarations are fully represented as recursive +O'Caml values. +

    +
  • +
+
+ + + + Code examples +

+This distribution contains several examples:

+
    +
  • +validate: simply parses a +document and prints all error messages +

  • + +
  • +readme: Defines a DTD for simple "README"-like documents, and offers +conversion to HTML and text filesThis particular document is an +example of this DTD!. +

  • + +
  • +xmlforms: This is already a +sophisticated application that uses XML as style sheet language and data +storage format. It shows how a Tk user interface can be configured by an +XML style, and how data records can be stored using XML. +

  • +
+
+ + + Restrictions and missing features +

+The following restrictions apply that are not violations of the standard: +

+
    +
  • +The attributes "xml:space", and "xml:lang" are not supported specially. + (The application can do this.)

  • + +
  • +The built-in support for SYSTEM and PUBLIC identifiers is limited to + local file access. There is no support for catalogs. The parser offers + a hook to add missing features.

  • + +
  • +It is currently not possible to check for interoperatibility with SGML. +

  • +
+ +

The following features are also missing:

+
    +
  • There is no special support for namespaces. (Perhaps in the next release?)

    +
  • +
  • There is no support for XPATH or XSLT.

    +
  • +
+

However, I hope that these features will be implemented soon, either by +myself or by contributors (who are invited to do so).

+
+ + + Recent Changes +
    +
  • +

    Changed in 1.0:

    +

    Support for document order.

    +
  • +
  • +

    Changed in 0.99.8:

    +

    Several fixes of bugs reported by Haruo Hosoya and Alain +Frisch.

    +

    The class type "node" has been extended: you can go directly to +the next and previous nodes in the list; you can refer to nodes by +position.

    +

    There are now some iterators for nodes: find, find_all, +find_element, find_all_elements, map_tree, iter_tree.

    +

    Experimental support for viewing attributes as nodes; I hope that +helps Alain writing his XPath evaluator.

    +

    The user's manual has been revised and is almost up to date.

    +
  • +
  • +

    Changed in 0.99.7:

    +

    There are now additional node types T_super_root, T_pinstr and +T_comment, and the parser is able to create the corresponding nodes.

    +

    The functions for character set conversion have been moved to +the Netstring package; they are not specific for XML.

    +
  • +
  • +

    Changed in 0.99.6:

    +

    Implemented a check on deterministic content models. Added +an alternate validator basing on a DFA. - This means that now all mandatory +features for an XML-1.0 parser are implemented! The parser is now substantially +complete.

    +
  • +
  • +

    Changed in 0.99.5:

    +

    The handling of ID and IDREF attributes has changed. The +index of nodes containing an ID attribute is now separated from the document. +Optionally the parser now checks whether the IDREF attributes refer to +existing elements.

    +

    The element nodes can optionally store the location in the +source XML code.

    +

    The method 'write' writes the XML tree in every supported +encoding. (Successor of 'write_compact_as_latin1'.)

    +

    Several smaller changes and fixes.

    +
  • +
  • +

    Changed in 0.99.4:

    +

    The module Pxp_reader has been modernized. The resolver classes +are simpler to use. There is now support for URLs.

    +

    The interface of Pxp_yacc has been improved: The type 'source' +is now simpler. The type 'domspec' has gone; the new 'spec' is opaque and +performs better. There are some new parsing modes.

    +

    Many smaller changes.

    +
  • +
  • +

    Changed in 0.99.3:

    +

    The markup_* modules have been renamed to pxp_*. There is a new +compatibility API that tries to be compatible with markup-0.2.10.

    +

    The type "encoding" is now a polymorphic variant.

    +
  • +
  • +

    Changed in 0.99.2:

    +

    Added checks for the constraints about the standalone +declaration.

    +

    Added regression tests about attribute normalization, +attribute checks, standalone checks.

    +

    Fixed some minor errors of the attribute normalization +function.

    +

    The bytecode/native archives are now separated in +a general part, in a ISO-8859-1-relevant part, and a UTF-8-relevant +part. The parser can again be compiled with ocamlopt.

    +
  • +
  • +

    Changed in 0.99.1:

    +

    In general, this release is an early pre-release of the +next stable version 1.00. I do not recommend to use it for serious +work; it is still very experimental!

    +

    The core of the parser has been rewritten using a self-written +parser generator.

    +

    The lexer has been restructured, and can now handle UTF-8 +encoded files.

    +

    Numerous other changes.

    +
  • + + +
+
+
+
+ diff --git a/helm/DEVEL/pxp/pxp/doc/SPEC b/helm/DEVEL/pxp/pxp/doc/SPEC new file mode 100644 index 000000000..28e6914ce --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/SPEC @@ -0,0 +1,185 @@ +****************************************************************************** +Notes on the XML specification +****************************************************************************** + + +============================================================================== +This document +============================================================================== + +There are some points in the XML specification which are ambiguous. The +following notes discuss these points, and describe how this parser behaves. + +============================================================================== +Conditional sections and the token ]]> +============================================================================== + +It is unclear what happens if an ignored section contains the token ]]> at +places where it is normally allowed, i.e. within string literals and comments, +e.g. + + --> ]]> + +On the one hand, the production rule of the XML grammar does not treat such +tokens specially. Following the grammar, already the first ]]> ends the +conditional section + + + +and the other tokens are included into the DTD. + +On the other hand, we can read: "Like the internal and external DTD subsets, a +conditional section may contain one or more complete declarations, comments, +processing instructions, or nested conditional sections, intermingled with +white space" (XML 1.0 spec, section 3.4). Complete declarations and comments +may contain ]]>, so this is contradictory to the grammar. + +The intention of conditional sections is to include or exclude the section +depending on the current replacement text of a parameter entity. Almost always +such sections are used as in + + (or "IGNORE") + + +This means that if it is possible to include a section it must also be legal to +ignore the same section. This is a strong indication that the token ]]> must +not count as section terminator if it occurs in a string literal or comment. + +This parser implements the latter. + +============================================================================== +Conditional sections and the inclusion of parameter entities +============================================================================== + +It is unclear what happens if an ignored section contains a reference to a +parameter entity. In most cases, this is not problematic because nesting of +parameter entities must respect declaration braces. The replacement text of +parameter entities must either contain a whole number of declarations or only +inner material of one declaration. Almost always it does not matter whether +these references are resolved or not (the section is ignored). + +But there is one case which is not explicitly specified: Is it allowed that the +replacement text of an entity contains the end marker ]]> of an ignored +conditional section? Example: + +"> + must be contained in +the same entity as the corresponding of +declarations). So it is possible to conclude that ]]> may be in another entity. + +Of course, there are many arguments not to allow such constructs: The resulting +code is incomprehensive, and parsing takes longer (especially if the entities +are external). I think the best argument against this kind of XML is that the +XML spec is not detailed enough, as it contains no rules where entity +references should be recognized and where not. For example: + +"> +"> + + +Which token ]]> counts? From a logical point of view, the ]]> in the third line +ends the conditional section. As already pointed out, the XML spec permits the +interpretation that ]]> is recognized even in string literals, and this may be +also true if it is "imported" from a separate entity; and so the first ]]> +denotes the end of the section. + +As a practical solution, this parser does not expand parameter entities in +ignored sections. Furthermore, it is also not allowed that the ending ]]> of +ignored or included sections is contained in a different entity than the +starting +%ext; +%ent; + +"ext" contains: + +"> + + + +Here, the reference %ent; would be illegal if the standalone declaration is +strictly interpreted. This parser handles the references %ent; and %ext; +equivalently which means that %ent; is allowed, but the element type "el" is +treated as externally declared. + +General entities can occur within the DTD, but they can only be contained in +the default value of attributes, or in the definition of other general +entities. The latter can be ignored, because the check will be repeated when +the entities are expanded. Though, general entities occuring in default +attribute values are actually checked at the moment when the default is used in +an element instance. + +General entities occuring in the document body are always checked. + +NDATA entities can occur in ENTITY attribute values; either in the element +instance or in the default declaration. Both cases are checked. + diff --git a/helm/DEVEL/pxp/pxp/doc/SPEC.xml b/helm/DEVEL/pxp/pxp/doc/SPEC.xml new file mode 100644 index 000000000..906f45a79 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/SPEC.xml @@ -0,0 +1,226 @@ + + +%common; + + +up'> + + +%config; + +]> + + + + + This document +

There are some points in the XML specification which are ambiguous. +The following notes discuss these points, and describe how this parser +behaves.

+
+ + + Conditional sections and the token ]]> + +

It is unclear what happens if an ignored section contains the +token ]]> at places where it is normally allowed, i.e. within string +literals and comments, e.g. + + +<![IGNORE[ <!-- ]]> --> ]]> + + +On the one hand, the production rule of the XML grammar does not treat such +tokens specially. Following the grammar, already the first ]]> ends +the conditional section + + +<![IGNORE[ <!-- ]]> + + +and the other tokens are included into the DTD.

+ +

On the other hand, we can read: "Like the internal and external DTD subsets, +a conditional section may contain one or more complete declarations, comments, +processing instructions, or nested conditional sections, intermingled with +white space" (XML 1.0 spec, section 3.4). Complete declarations and comments +may contain ]]>, so this is contradictory to the grammar.

+ +

The intention of conditional sections is to include or exclude the section +depending on the current replacement text of a parameter entity. Almost +always such sections are used as in + + +<!ENTITY % want.a.feature.or.not "INCLUDE"> (or "IGNORE") +<![ %want.a.feature.or.not; [ ... ]]> + + +This means that if it is possible to include a section it must also be +legal to ignore the same section. This is a strong indication that +the token ]]> must not count as section terminator if it occurs +in a string literal or comment.

+ +

This parser implements the latter.

+ +
+ + + Conditional sections and the inclusion of parameter entities + +

It is unclear what happens if an ignored section contains a reference +to a parameter entity. In most cases, this is not problematic because +nesting of parameter entities must respect declaration braces. The +replacement text of parameter entities must either contain a whole +number of declarations or only inner material of one declaration. Almost always +it does not matter whether these references are resolved or not +(the section is ignored).

+ +

But there is one case which is not explicitly specified: Is it allowed +that the replacement text of an entity contains the end marker ]]> +of an ignored conditional section? Example: + + +<!ENTITY % end "]]>"> +<![ IGNORE [ %end; + + +We do not find the statement in the XML spec that the ]]> must be contained +in the same entity as the corresponding <![ (as for the tokens <! and +> of declarations). So it is possible to conclude that ]]> may be in +another entity.

+ +

Of course, there are many arguments not to allow such constructs: The +resulting code is incomprehensive, and parsing takes longer (especially if the +entities are external). I think the best argument against this kind of XML +is that the XML spec is not detailed enough, as it contains no rules where +entity references should be recognized and where not. For example: + + +<!ENTITY % y "]]>"> +<!ENTITY % x "<!ENTITY z '<![CDATA[some text%y;'>"> +<![ IGNORE [ %x; ]]> + + +Which token ]]> counts? From a logical point of view, the ]]> in the +third line ends the conditional section. As already pointed out, the XML spec +permits the interpretation that ]]> is recognized even in string literals, +and this may be also true if it is "imported" from a separate entity; and so +the first ]]> denotes the end of the section.

+ +

As a practical solution, this parser does not expand parameter entities +in ignored sections. Furthermore, it is also not allowed that the ending ]]> +of ignored or included sections is contained in a different entity than the +starting <![ token.

+
+ + + + Standalone documents and attribute normalization + +

+If a document is declared as stand-alone, a restriction on the effect of +attribute normalization takes effect for attributes declared in external +entities. Normally, the parser knows the type of the attribute from +the ATTLIST declaration, and it can normalize attribute values depending +on their types. For example, an NMTOKEN attribute can be written with +leading or trailing spaces, but the parser returns always the nmtoken +without such added spaces; in contrast to this, a CDATA attribute is +not normalized in this way. For stand-alone document the type information is +not available if the ATTLIST declaration is located in an external +entity. Because of this, the XML spec demands that attribute values must +be written in their normal form in this case, i.e. without additional +spaces. +

+

This parser interprets this restriction as follows. Obviously, +the substitution of character and entity references is not considered +as a "change of the value" as a result of the normalization, because +these operations will be performed identically if the ATTLIST declaration +is not available. The same applies to the substitution of TABs, CRs, +and LFs by space characters. Only the removal of spaces depending on +the type of the attribute changes the value if the ATTLIST is not +available. +

+

This means in detail: CDATA attributes never violate the +stand-alone status. ID, IDREF, NMTOKEN, ENTITY, NOTATION and enumerator +attributes must not be written with leading and/or trailing spaces. IDREF, +ENTITIES, and NMTOKENS attributes must not be written with extra spaces at the +beginning or at the end of the value, or between the tokens of the list. +

+

The whole check is dubious, because the attribute type expresses also a +semantical constraint, not only a syntactical one. At least this parser +distinguishes strictly between single-value and list types, and returns the +attribute values differently; the first are represented as Value s (where s is +a string), the latter are represented as Valuelist [s1; s2; ...; sN]. The +internal representation of the value is dependent on the attribute type, too, +such that even normalized values are processed differently depending on +whether the attribute has list type or not. For this parser, it makes still a +difference whether a value is normalized and processed as if it were CDATA, or +whether the value is processed according to its declared type. +

+

The stand-alone check is included to be able to make a statement +whether other, well-formedness parsers can process the document. Of course, +these parsers always process attributes as CDATA, and the stand-alone check +guarantees that these parsers will always see the normalized values. +

+
+ + + Standalone documents and the restrictions on entity +references +

+Stand-alone documents must not refer to entities which are declared in an +external entity. This parser applies this rule only: to general and NDATA +entities when they occur in the document body (i.e. not in the DTD); and to +general and NDATA entities occuring in default attribute values declared in the +internal subset of the DTD. +

+

+Parameter entities are out of discussion for the stand-alone property. If there +is a parameter entity reference in the internal subset which was declared in an +external entity, it is not available in the same way as the external entity is +not available that contains its declaration. Because of this "equivalence", +parameter entity references are not checked on violations against the +stand-alone declaration. It simply does not matter. - Illustration: +

+ +

+Main document: + + +%ext; +%ent; +]]> + +"ext" contains: + + "> +]]> +

+ +

Here, the reference %ent; would be illegal if the standalone +declaration is strictly interpreted. This parser handles the references +%ent; and %ext; equivalently which means that %ent; is allowed, but the +element type "el" is treated as externally declared. +

+ +

+General entities can occur within the DTD, but they can only be contained in +the default value of attributes, or in the definition of other general +entities. The latter can be ignored, because the check will be repeated when +the entities are expanded. Though, general entities occuring in default +attribute values are actually checked at the moment when the default is +used in an element instance. +

+

+General entities occuring in the document body are always checked.

+

+NDATA entities can occur in ENTITY attribute values; either in the element +instance or in the default declaration. Both cases are checked. +

+
+ +
diff --git a/helm/DEVEL/pxp/pxp/doc/design.txt b/helm/DEVEL/pxp/pxp/doc/design.txt new file mode 100644 index 000000000..bf75d0618 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/design.txt @@ -0,0 +1,340 @@ +------------------------------------------------ -*- indented-text -*- +Some Notes About the Design: +---------------------------------------------------------------------- + +---------------------------------------------------------------------- +Compilation +---------------------------------------------------------------------- + +Compilation is non-trivial because: + + - The lexer and parser generators ocamlllex resp. ocamlyacc normally + create code such that the parser module precedes the lexer module. + THIS design requires that the lexer layer precedes the entity layer + which precedes the parser layer, because the parsing results modify + the behaviour of the lexer and entity layers. There is no way to get + around this because of the nature of XML. + + So the dependency relation of the lexer and the parser is modified; + in particular the "token" type that is normally defined by the + generated parser is moved to a common prdecessor of both lexer + and parser. + + - Another modification of the standard way of handling parsers is that + the parser is turned into an object. This is necessary because the + whole parser is polymorphic, i.e. there is a type parameter (the + type of the node extension). + +...................................................................... + +First some modules are generated as illustrated by the following +diagram: + + + markup_yacc.mly + | | + \|/ \|/ [ocamlyacc, 1] + V V + markup_yacc.mli markup_yacc.ml + | --> renamed into markup_yacc.ml0 + [awk, 2] \|/ | + V \|/ [sed, 3] + markup_yacc_token.mlf V + | | markup_yacc.ml + markup_lexer_types_ | | + shadow.mli | | | markup_lexer_types_ + \|/ [sed, \|/ | shadow.ml + V 4] V | | + markup_lexer_types.mli | | [sed, 4] + \|/ \|/ + V V + markup_lexer_types.ml + + + markup_yacc_shadow.mli + | + \|/ [replaces, 5] + V + markup_yacc.mli + + + + markup_lexers.mll + | + \|/ [ocamllex, 6] + V + markup_lexers.ml + + +Notes: + + (1) ocamlyacc generates both a module and a module interface. + The module is postprocessed in step (3). The interface cannot + be used, but it contains the definition of the "token" type. + This definition is extracted in step (2). The interface is + completely replaced in step (5) by a different file. + + (2) An "awk" script extracts the definition of the type "token". + "token" is created by ocamlyacc upon the %token directives + in markup_yacc.mly, and normally "token" is defined in + the module generated by ocamlyacc. This turned out not to be + useful as the module dependency must be that the lexer is + an antecedent of the parser and not vice versa (as usually), + so the "token" type is "moved" to the module Markup_lexer_types + which is an antecedent of both the lexer and the parser. + + (3) A "sed" script turns the generated parser into an object. + This is rather simple; some "let" definitions must be rewritten + as "val" definitions, the other "let" definitions as + "method" definitions. The parser object is needed because + the whole parser has a polymorphic type parameter. + + (4) The implementation and definition of Markup_lexer_types are + both generated by inserting the "token" type definition + (in markup_lexer_types.mlf) into two pattern files, + markup_lexer_types_shadow.ml resp. -.mli. The point of insertion + is marked by the string INCLUDE_HERE. + + (5) The generated interface of the Markup_yacc module is replaced + by a hand-written file. + + (6) ocamllex generates the lexer; this process is not patched in any + way. + +...................................................................... + +After the additional modules have been generated, compilation proceeds +in the usual manner. + + +---------------------------------------------------------------------- +Hierarchy of parsing layers: +---------------------------------------------------------------------- + +From top to bottom: + + - Parser: Markup_yacc + + gets input stream from the main entity object + + checks most of the grammar + + creates the DTD object as side-effect + + creates the element tree as side-effect + + creates further entity objects that are entered into the DTD + - Entity layer: Markup_entity + + gets input stream from the lexers, or another entity object + + handles entity references: if a reference is encountered the + input stream is redirected such that the tokens come from the + referenced entity object + + handles conditional sections + - Lexer layer: Markup_lexers + + gets input from lexbuffers created by resolvers + + different lexers for different lexical contexts + + a lexer returns pairs (token,lexid), where token is the scanned + token, and lexid is the name of the lexer that must be used for + the next token + - Resolver layer: Markup_entity + + a resolver creates the lexbuf from some character source + + a resolver recodes the input and handles the encoding scheme + +---------------------------------------------------------------------- +The YACC based parser +---------------------------------------------------------------------- + +ocamlyacc allows it to pass an arbitrary 'next_token' function to the +parsing functions. We always use 'en # next_token()' where 'en' is the +main entity object representing the main file to be parsed. + +The parser is not functional, but uses mainly side-effects to accumulate +the structures that have been recognized. This is very important for the +entity definitions, because once an entity definition has been found there +may be a reference to it which is handled by the entity layer (which is +below the yacc layer). This means that such a definition modifies the +token source of the parser, and this can only be handled by side-effects +(at least in a sensible manner; a purely functional parser would have to +pass unresolved entity references to its caller, which would have to +resolve the reference and to re-parse the whole document!). + +Note that also element definitions profit from the imperative style of +the parser; an element instance can be validated directly once the end +tag has been read in. + +---------------------------------------------------------------------- +The entity layer +---------------------------------------------------------------------- + +The parser gets the tokens from the main entity object. This object +controls the underlying lexing mechanism (see below), and already +interprets the following: + +- Conditional sections (if they are allowed in this entity): + The structures and are + recognized and interpreted. + + This would be hard to realize by the yacc parser, because: + - INCLUDE and IGNORE are not recognized as lexical keywords but as names. + This means that the parser cannot select different rules for them. + - The text after IGNORE requires a different lexical handling. + +- Entity references: &name; and %name; + The named entity is looked up and the input source is redirected to it, i.e. + if the main entity object gets the message 'next_token' this message is + forwarded to the referenced entity. (This entity may choose to forward the + message again to a third entity, and so on.) + + There are some fine points: + + - It is okay that redirection happens at token level, not at character level: + + General entities must always match the 'content' production, and because + of this they must always consist of a whole number of tokens. + + If parameter entities are resolved, the XML specification states that + a space character is inserted before and after the replacement text. + This also means that such entities always consists of a whole number + of tokens. + + - There are some "nesting constraints": + + General entities must match the 'content' production. Because of this, + the special token Begin_entity is inserted before the first token of + the entity, and End_entity is inserted just before the Eof token. The + brace Begin_entity...End_entity is recognized by the yacc parser, but + only in the 'content' production. + + External parameter entities must match 'extSubsetDecl'. Again, + Begin_entity and End_entity tokens embrace the inner token stream. + The brace Begin_entity...End_entity is recognized by the yacc parser + at the appropriate position. + (As general and parameter entities are used in different contexts + (document vs. DTD), both kinds of entities can use the same brace + Begin_entity...End_entity.) + + TODO: + The constraints for internal parameter entities are not yet checked. + + - Recursive references can be detected because entities must be opened + before the 'next_token' method can be invoked. + +---------------------------------------------------------------------- +The lexer layer +---------------------------------------------------------------------- + +There are five main lexers, and a number of auxiliary lexers. The five +main lexers are: + +- Document (function scan_document): + Scans an XML document outside the DTD and outside the element instance. + +- Content (function scan_content): + Scans an element instance, but not within tags. + +- Within_tag (function scan_within_tag): + Scans within <...>, i.e. a tag denoting an element instance. + +- Document_type (function scan_document_type): + Scans after . + +- Declaration (function scan_declaration): + Scans sequences of declarations + +Why several lexers? Because there are different lexical rules in these +five regions of an XML document. + +Every lexer not only produces tokens, but also the name of the next lexer +to use. For example, if the Document lexer scans " + ]> + ∅ + - This is illegal, and the presence of an empty Begin_entity/End_entity pair + helps to recognize this. diff --git a/helm/DEVEL/pxp/pxp/doc/dist-common.xml b/helm/DEVEL/pxp/pxp/doc/dist-common.xml new file mode 100644 index 000000000..d18a1500f --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/dist-common.xml @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Gerd Stolpmann'> + + + diff --git a/helm/DEVEL/pxp/pxp/doc/manual/Makefile b/helm/DEVEL/pxp/pxp/doc/manual/Makefile new file mode 100644 index 000000000..5a3e1ffab --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/Makefile @@ -0,0 +1,82 @@ +DOCBOOK_HTML = /usr/share/sgml/docbkdsl/html +DOCBOOK_PRINT = /usr/share/sgml/docbkdsl/print +SRC = $(PWD)/src + +.PHONY: html ps + +default: html ps + +html: html/book1.htm html/pic/done + +ps: ps/markup.ps ps/pic/done + + +src/readme.ent: ../../examples/readme/to_html.ml + src/getcode.ml <../../examples/readme/to_html.ml >src/readme.ent + +src/yacc.mli.ent: ../../pxp_yacc.mli + src/getcode.ml <../../pxp_yacc.mli >src/yacc.mli.ent + +src/dtd.mli.ent: ../../pxp_dtd.mli + src/getcode.ml <../../pxp_dtd.mli >src/dtd.mli.ent + +html/book1.htm: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent + mkdir -p html + cp src/markup.css html; \ + cd html; \ + rm -f *.htm*; \ + jade -t sgml -D$(DOCBOOK_HTML) -D$(SRC) -ihtml markup.sgml; \ + true + touch html/TIMESTAMP + +html/pic/done: src/pic/*.fig + mkdir -p html/pic + l=`cd src/pic; echo *.fig`; \ + for x in $$l; do fig2dev -L gif src/pic/$$x html/pic/`basename $$x .fig`.gif; done + touch html/pic/done + +#man: src/findlib_reference.xml +# mkdir -p man +# cd man; \ +# rm -f *.[0-9]; \ +# db2man <../src/findlib_reference.xml + +ps/markup.tex: src/*.sgml src/readme.ent src/yacc.mli.ent src/dtd.mli.ent + mkdir -p ps + cd ps; \ + jade -t tex -D$(DOCBOOK_PRINT) -D$(SRC) markup.sgml; \ + true + +ps/markup.dvi: ps/markup.tex ps/pic/done + cd ps; \ + jadetex markup.tex; \ + jadetex markup.tex; \ + jadetex markup.tex + +ps/markup.ps: ps/markup.dvi + cd ps; \ + dvips -f markup.ps + +ps/pic/done: src/pic/*.fig + mkdir -p ps/pic + l=`cd src/pic; echo *.fig`; \ + for x in $$l; do fig2dev -L ps -m 0.8 src/pic/$$x ps/pic/`basename $$x .fig`.ps; done + touch ps/pic/done + +.SUFFIXES: .xml .sgml + +.sgml.xml: + sx -xndata $< >$@; true + + + +clean: + rm -rf html man ps + rm -f src/readme.ent + +CLEAN: clean + +distclean: + rm -f src/*~ + rm -f *~ + rm -f ps/*.aux ps/*.dvi ps/*.log ps/*.tex diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/TIMESTAMP b/helm/DEVEL/pxp/pxp/doc/manual/html/TIMESTAMP new file mode 100644 index 000000000..e69de29bb diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html new file mode 100644 index 000000000..ab88e87bf --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c1567.html @@ -0,0 +1,434 @@ +Configuring and calling the parser
The PXP user's guide
PrevNext

Chapter 4. Configuring and calling the parser

4.1. Overview

There are the following main functions invoking the parser (in Pxp_yacc): + +

  • parse_document_entity: You want to +parse a complete and closed document consisting of a DTD and the document body; +the body is validated against the DTD. This mode is interesting if you have a +file + +

    <!DOCTYPE root ... [ ... ] > <root> ... </root>
    + +and you can accept any DTD that is included in the file (e.g. because the file +is under your control).

  • parse_wfdocument_entity: You want to +parse a complete and closed document consisting of a DTD and the document body; +but the body is not validated, only checked for well-formedness. This mode is +preferred if validation costs too much time or if the DTD is missing.

  • parse_dtd_entity: You want only to +parse an entity (file) containing the external subset of a DTD. Sometimes it is +interesting to read such a DTD, for example to compare it with the DTD included +in a document, or to apply the next mode:

  • parse_content_entity: You want only to +parse an entity (file) containing a fragment of a document body; this fragment +is validated against the DTD you pass to the function. Especially, the fragment +must not have a <!DOCTYPE> clause, and must directly +begin with an element. The element is validated against the DTD. This mode is +interesting if you want to check documents against a fixed, immutable DTD.

  • parse_wfcontent_entity: This function +also parses a single element without DTD, but does not validate it.

  • extract_dtd_from_document_entity: This +function extracts the DTD from a closed document consisting of a DTD and a +document body. Both the internal and the external subsets are extracted.

In many cases, parse_document_entity is the preferred mode +to parse a document in a validating way, and +parse_wfdocument_entity is the mode of choice to parse a +file while only checking for well-formedness.

There are a number of variations of these modes. One important application of a +parser is to check documents of an untrusted source against a fixed DTD. One +solution is to not allow the <!DOCTYPE> clause in +these documents, and treat the document like a fragment (using mode +parse_content_entity). This is very simple, but +inflexible; users of such a system cannot even define additional entities to +abbreviate frequent phrases of their text.

It may be necessary to have a more intelligent checker. For example, it is also +possible to parse the document to check fully, i.e. with DTD, and to compare +this DTD with the prescribed one. In order to fully parse the document, mode +parse_document_entity is applied, and to get the DTD to +compare with mode parse_dtd_entity can be used.

There is another very important configurable aspect of the parser: the +so-called resolver. The task of the resolver is to locate the contents of an +(external) entity for a given entity name, and to make the contents accessible +as a character stream. (Furthermore, it also normalizes the character set; +but this is a detail we can ignore here.) Consider you have a file called +"main.xml" containing + +

<!ENTITY % sub SYSTEM "sub/sub.xml">
+%sub;
+ +and a file stored in the subdirectory "sub" with name +"sub.xml" containing + +
<!ENTITY % subsub SYSTEM "subsub/subsub.xml">
+%subsub;
+ +and a file stored in the subdirectory "subsub" of +"sub" with name "subsub.xml" (the +contents of this file do not matter). Here, the resolver must track that +the second entity subsub is located in the directory +"sub/subsub", i.e. the difficulty is to interpret the +system (file) names of entities relative to the entities containing them, +even if the entities are deeply nested.

There is not a fixed resolver already doing everything right - resolving entity +names is a task that highly depends on the environment. The XML specification +only demands that SYSTEM entities are interpreted like URLs +(which is not very precise, as there are lots of URL schemes in use), hoping +that this helps overcoming the local peculiarities of the environment; the idea +is that if you do not know your environment you can refer to other entities by +denoting URLs for them. I think that this interpretation of +SYSTEM names may have some applications in the internet, but +it is not the first choice in general. Because of this, the resolver is a +separate module of the parser that can be exchanged by another one if +necessary; more precisely, the parser already defines several resolvers.

The following resolvers do already exist: + +

  • Resolvers reading from arbitrary input channels. These +can be configured such that a certain ID is associated with the channel; in +this case inner references to external entities can be resolved. There is also +a special resolver that interprets SYSTEM IDs as URLs; this resolver can +process relative SYSTEM names and determine the corresponding absolute URL.

  • A resolver that reads always from a given O'Caml +string. This resolver is not able to resolve further names unless the string is +not associated with any name, i.e. if the document contained in the string +refers to an external entity, this reference cannot be followed in this +case.

  • A resolver for file names. The SYSTEM +name is interpreted as file URL with the slash "/" as separator for +directories. - This resolver is derived from the generic URL resolver.

+ +The interface a resolver must have is documented, so it is possible to write +your own resolver. For example, you could connect the parser with an HTTP +client, and resolve URLs of the HTTP namespace. The resolver classes support +that several independent resolvers are combined to one more powerful resolver; +thus it is possible to combine a self-written resolver with the already +existing resolvers.

Note that the existing resolvers only interpret SYSTEM +names, not PUBLIC names. If it helps you, it is possible to +define resolvers for PUBLIC names, too; for example, such a +resolver could look up the public name in a hash table, and map it to a system +name which is passed over to the existing resolver for system names. It is +relatively simple to provide such a resolver.


PrevHomeNext
Details of the mapping from XML text to the tree representationUpResolvers and sources
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html new file mode 100644 index 000000000..d74ecbbca --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c36.html @@ -0,0 +1,533 @@ +What is XML?
The PXP user's guide
PrevNext

Chapter 1. What is XML?

1.1. Introduction

XML (short for Extensible Markup Language) +generalizes the idea that text documents are typically structured in sections, +sub-sections, paragraphs, and so on. The format of the document is not fixed +(as, for example, in HTML), but can be declared by a so-called DTD (document +type definition). The DTD describes only the rules how the document can be +structured, but not how the document can be processed. For example, if you want +to publish a book that uses XML markup, you will need a processor that converts +the XML file into a printable format such as Postscript. On the one hand, the +structure of XML documents is configurable; on the other hand, there is no +longer a canonical interpretation of the elements of the document; for example +one XML DTD might want that paragraphes are delimited by +para tags, and another DTD expects p tags +for the same purpose. As a result, for every DTD a new processor is required.

Although XML can be used to express structured text documents it is not limited +to this kind of application. For example, XML can also be used to exchange +structured data over a network, or to simply store structured data in +files. Note that XML documents cannot contain arbitrary binary data because +some characters are forbidden; for some applications you need to encode binary +data as text (e.g. the base 64 encoding).

1.1.1. The "hello world" example

The following example shows a very simple DTD, and a corresponding document +instance. The document is structured such that it consists of sections, and +that sections consist of paragraphs, and that paragraphs contain plain text:

<!ELEMENT document (section)+>
+<!ELEMENT section (paragraph)+>
+<!ELEMENT paragraph (#PCDATA)>

The following document is an instance of this DTD:

<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE document SYSTEM "simple.dtd">
+<document>
+  <section>
+    <paragraph>This is a paragraph of the first section.</paragraph>
+    <paragraph>This is another paragraph of the first section.</paragraph>
+  </section>
+  <section>
+    <paragraph>This is the only paragraph of the second section.</paragraph>
+  </section>
+</document>

As in HTML (and, of course, in grand-father SGML), the "pieces" of +the document are delimited by element braces, i.e. such a piece begins with +<name-of-the-type-of-the-piece> and ends with +</name-of-the-type-of-the-piece>, and the pieces are +called elements. Unlike HTML and SGML, both start tags and +end tags (i.e. the delimiters written in angle brackets) can never be left +out. For example, HTML calls the paragraphs simply p, and +because paragraphs never contain paragraphs, a sequence of several paragraphs +can be written as: + +

<p>First paragraph 
+<p>Second paragraph
+ +This is not possible in XML; continuing our example above we must always write + +
<paragraph>First paragraph</paragraph>
+<paragraph>Second paragraph</paragraph>
+ +The rationale behind that is to (1) simplify the development of XML parsers +(you need not convert the DTD into a deterministic finite automaton which is +required to detect omitted tags), and to (2) make it possible to parse the +document independent of whether the DTD is known or not.

The first line of our sample document, + +

<?xml version="1.0" encoding="ISO-8859-1"?>
+ +is the so-called XML declaration. It expresses that the +document follows the conventions of XML version 1.0, and that the document is +encoded using characters from the ISO-8859-1 character set (often known as +"Latin 1", mostly used in Western Europe). Although the XML declaration is not +mandatory, it is good style to include it; everybody sees at the first glance +that the document uses XML markup and not the similar-looking HTML and SGML +markup languages. If you omit the XML declaration, the parser will assume +that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes +it possible to distinguish between UTF-8 and UTF-16 automatically); these +are encodings of Unicode's universal character set. (Note that PXP, unlike its +predecessor "Markup", fully supports Unicode.)

The second line, + +

<!DOCTYPE document SYSTEM "simple.dtd">
+ +names the DTD that is going to be used for the rest of the document. In +general, it is possible that the DTD consists of two parts, the so-called +external and the internal subset. "External" means that the DTD exists as a +second file; "internal" means that the DTD is included in the same file. In +this example, there is only an external subset, and the system identifier +"simple.dtd" specifies where the DTD file can be found. System identifiers are +interpreted as URLs; for instance this would be legal: + +
<!DOCTYPE document SYSTEM "http://host/location/simple.dtd">
+ +Please note that PXP cannot interpret HTTP identifiers by default, but it is +possible to change the interpretation of system identifiers.

The word immediately following DOCTYPE determines which of +the declared element types (here "document", "section", and "paragraph") is +used for the outermost element, the root element. In this +example it is document because the outermost element is +delimited by <document> and +</document>.

The DTD consists of three declarations for element types: +document, section, and +paragraph. Such a declaration has two parts: + +

<!ELEMENT name content-model>
+ +The content model is a regular expression which describes the possible inner +structure of the element. Here, document contains one or +more sections, and a section contains one or more +paragraphs. Note that these two element types are not allowed to contain +arbitrary text. Only the paragraph element type is declared +such that parsed character data (indicated by the symbol +#PCDATA) is permitted.

See below for a detailed discussion of content models.

1.1.2. XML parsers and processors

XML documents are human-readable, but this is not the main purpose of this +language. XML has been designed such that documents can be read by a program +called an XML parser. The parser checks that the document +is well-formatted, and it represents the document as objects of the programming +language. There are two aspects when checking the document: First, the document +must follow some basic syntactic rules, such as that tags are written in angle +brackets, that for every start tag there must be a corresponding end tag and so +on. A document respecting these rules is +well-formed. Second, the document must match the DTD in +which case the document is valid. Many parsers check only +on well-formedness and ignore the DTD; PXP is designed such that it can +even validate the document.

A parser does not make a sensible application, it only reads XML +documents. The whole application working with XML-formatted data is called an +XML processor. Often XML processors convert documents into +another format, such as HTML or Postscript. Sometimes processors extract data +of the documents and output the processed data again XML-formatted. The parser +can help the application processing the document; for example it can provide +means to access the document in a specific manner. PXP supports an +object-oriented access layer specially.

1.1.3. Discussion

As we have seen, there are two levels of description: On the one hand, XML can +define rules about the format of a document (the DTD), on the other hand, XML +expresses structured documents. There are a number of possible applications:

  • XML can be used to express structured texts. Unlike HTML, there is no canonical +interpretation; one would have to write a backend for the DTD that translates +the structured texts into a format that existing browsers, printers +etc. understand. The advantage of a self-defined document format is that it is +possible to design the format in a more problem-oriented way. For example, if +the task is to extract reports from a database, one can use a DTD that reflects +the structure of the report or the database. A possible approach would be to +have an element type for every database table and for every column. Once the +DTD has been designed, the report procedure can be splitted up in a part that +selects the database rows and outputs them as an XML document according to the +DTD, and in a part that translates the document into other formats. Of course, +the latter part can be solved in a generic way, e.g. there may be configurable +backends for all DTDs that follow the approach and have element types for +tables and columns.

    XML plays the role of a configurable intermediate format. The database +extraction function can be written without having to know the details of +typesetting; the backends can be written without having to know the details of +the database.

    Of course, there are traditional solutions. One can define an ad hoc +intermediate text file format. This disadvantage is that there are no names for +the pieces of the format, and that such formats usually lack of documentation +because of this. Another solution would be to have a binary representation, +either as language-dependent or language-independent structure (example of the +latter can be found in RPC implementations). The disadvantage is that it is +harder to view such representations, one has to write pretty printers for this +purpose. It is also more difficult to enter test data; XML is plain text that +can be written using an arbitrary editor (Emacs has even a good XML mode, +PSGML). All these alternatives suffer from a missing structure checker, +i.e. the programs processing these formats usually do not check the input file +or input object in detail; XML parsers check the syntax of the input (the +so-called well-formedness check), and the advanced parsers like PXP even +verify that the structure matches the DTD (the so-called validation).

  • XML can be used as configurable communication language. A fundamental problem +of every communication is that sender and receiver must follow the same +conventions about the language. For data exchange, the question is usually +which data records and fields are available, how they are syntactically +composed, and which values are possible for the various fields. Similar +questions arise for text document exchange. XML does not answer these problems +completely, but it reduces the number of ambiguities for such conventions: The +outlines of the syntax are specified by the DTD (but not necessarily the +details), and XML introduces canonical names for the components of documents +such that it is simpler to describe the rest of the syntax and the semantics +informally.

  • XML is a data storage format. Currently, every software product tends to use +its own way to store data; commercial software often does not describe such +formats, and it is a pain to integrate such software into a bigger project. +XML can help to improve this situation when several applications share the same +syntax of data files. DTDs are then neutral instances that check the format of +data files independent of applications.


PrevHomeNext
User's guideUpHighlights of XML
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html new file mode 100644 index 000000000..c58e6ff3e --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c533.html @@ -0,0 +1,234 @@ +Using PXP
The PXP user's guide
PrevNext

Chapter 2. Using PXP

2.1. Validation

The parser can be used to validate a document. This means +that all the constraints that must hold for a valid document are actually +checked. Validation is the default mode of PXP, i.e. every document is +validated while it is being parsed.

In the examples directory of the distribution you find the +pxpvalidate application. It is invoked in the following way: + +

pxpvalidate [ -wf ] file...
+ +The files mentioned on the command line are validated, and every warning and +every error messages are printed to stderr.

The -wf switch modifies the behaviour such that a well-formedness parser is +simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the +DTD are ignored, and only the ENTITY declarations will take effect. This mode +is intended for documents lacking a DTD. Please note that the parser still +scans the DTD fully and will report all errors in the DTD; such checks are not +required by a well-formedness parser.

The pxpvalidate application is the simplest sensible program +using PXP, you may consider it as "hello world" program.


PrevHomeNext
A complete example: The readme DTDUpHow to parse a document from an application
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html b/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html new file mode 100644 index 000000000..0e564fb20 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/c893.html @@ -0,0 +1,349 @@ +The objects representing the document
The PXP user's guide
PrevNext

Chapter 3. The objects representing the document

This description might be out-of-date. See the module interface files +for updated information.

3.1. The document class

class [ 'ext ] document :
+  Pxp_types.collect_warnings -> 
+  object
+    method init_xml_version : string -> unit
+    method init_root : 'ext node -> unit
+
+    method xml_version : string
+    method xml_standalone : bool
+    method dtd : dtd
+    method root : 'ext node
+
+    method encoding : Pxp_types.rep_encoding
+
+    method add_pinstr : proc_instruction -> unit
+    method pinstr : string -> proc_instruction list
+    method pinstr_names : string list
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+  end
+;;
+ +The methods beginning with init_ are only for internal use +of the parser.

  • xml_version: returns the version string at the beginning of +the document. For example, "1.0" is returned if the document begins with +<?xml version="1.0"?>.

  • xml_standalone: returns the boolean value of +standalone declaration in the XML declaration. If the +standalone attribute is missing, false is +returned.

  • dtd: returns a reference to the global DTD object.

  • root: returns a reference to the root element.

  • encoding: returns the internal encoding of the +document. This means that all strings of which the document consists are +encoded in this character set.

  • pinstr: returns the processing instructions outside the DTD +and outside the root element. The argument passed to the method names a +target, and the method returns all instructions with this +target. The target is the first word inside <? and +?>.

  • pinstr_names: returns the names of the processing instructions

  • add_pinstr: adds another processing instruction. This method +is used by the parser itself to enter the instructions returned by +pinstr, but you can also enter additional instructions.

  • write: writes the document to the passed stream as XML +text using the passed (external) encoding. The generated text is always valid +XML and can be parsed by PXP; however, the text is badly formatted (this is not +a pretty printer).


PrevHomeNext
Example: An HTML backend for the readme +DTDUpThe class type node
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/index.html b/helm/DEVEL/pxp/pxp/doc/manual/html/index.html new file mode 100644 index 000000000..3c07ff28f --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/index.html @@ -0,0 +1,330 @@ +The PXP user's guide

The PXP user's guide

Gerd Stolpmann

Copyright © 1999, 2000 by Gerd Stolpmann

PXP is a validating parser for XML-1.0 which has been +written entirely in Objective Caml.

Download PXP:

The free PXP library can be downloaded at +http://www.ocaml-programming.de/packages/. This user's guide is included. +Newest releases of PXP will be announced in +The OCaml Link +Database.

License

This document, and the described software, "PXP", are copyright by +Gerd Stolpmann.

Permission is hereby granted, free of charge, to any person obtaining +a copy of this document and the "PXP" software (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions:

The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software.

The Software is provided ``as is'', without warranty of any kind, express +or implied, including but not limited to the warranties of +merchantability, fitness for a particular purpose and noninfringement. +In no event shall Gerd Stolpmann be liable for any claim, damages or +other liability, whether in an action of contract, tort or otherwise, +arising from, out of or in connection with the Software or the use or +other dealings in the software.



  Next
  User's guide
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css b/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css new file mode 100644 index 000000000..67dfaecb7 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/markup.css @@ -0,0 +1,4 @@ +.acronym { + font-weight: bold; + color: #c71585 +} diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html b/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html new file mode 100644 index 000000000..9db427d34 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/p34.html @@ -0,0 +1,167 @@ +User's guide
The PXP user's guide
PrevNext


PrevHomeNext
The PXP user's guide What is XML?
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/done b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/done new file mode 100644 index 000000000..e69de29bb diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif new file mode 100644 index 000000000..6cc260a4e Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/extension_general.gif differ diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif new file mode 100644 index 000000000..0091db2a2 Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_add.gif differ diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif new file mode 100644 index 000000000..97cd3639e Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_clone.gif differ diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif new file mode 100644 index 000000000..d521123a7 Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_delete.gif differ diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif new file mode 100644 index 000000000..5f6358cc3 Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_general.gif differ diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif new file mode 100644 index 000000000..5644c91f3 Binary files /dev/null and b/helm/DEVEL/pxp/pxp/doc/manual/html/pic/node_term.gif differ diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html new file mode 100644 index 000000000..102aba218 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x107.html @@ -0,0 +1,1694 @@ +Highlights of XML
The PXP user's guide
PrevChapter 1. What is XML?Next

1.2. Highlights of XML

This section explains many of the features of XML, but not all, and some +features not in detail. For a complete description, see the XML +specification.

1.2.1. The DTD and the instance

The DTD contains various declarations; in general you can only use a feature if +you have previously declared it. The document instance file may contain the +full DTD, but it is also possible to split the DTD into an internal and an +external subset. A document must begin as follows if the full DTD is included: + +

<?xml version="1.0" encoding="Your encoding"?>
+<!DOCTYPE root [
+  Declarations
+]>
+ +These declarations are called the internal subset. Note +that the usage of entities and conditional sections is restricted within the +internal subset.

If the declarations are located in a different file, you can refer to this file +as follows: + +

<?xml version="1.0" encoding="Your encoding"?>
+<!DOCTYPE root SYSTEM "file name">
+ +The declarations in the file are called the external +subset. The file name is called the system +identifier. +It is also possible to refer to the file by a so-called +public identifier, but most XML applications won't use +this feature.

You can also specify both internal and external subsets. In this case, the +declarations of both subsets are mixed, and if there are conflicts, the +declaration of the internal subset overrides those of the external subset with +the same name. This looks as follows: + +

<?xml version="1.0" encoding="Your encoding"?>
+<!DOCTYPE root  SYSTEM "file name" [
+  Declarations
+]>

The XML declaration (the string beginning with <?xml and +ending at ?>) should specify the encoding of the +file. Common values are UTF-8, and the ISO-8859 series of character sets. Note +that every file parsed by the XML processor can begin with an XML declaration +and that every file may have its own encoding.

The name of the root element must be mentioned directly after the +DOCTYPE string. This means that a full document instance +looks like + +

<?xml version="1.0" encoding="Your encoding"?>
+<!DOCTYPE root  SYSTEM "file name" [
+  Declarations
+]>
+
+<root>
+  inner contents
+</root>

1.2.2. Reserved characters

Some characters are generally reserved to indicate markup such that they cannot +be used for character data. These characters are <, >, and +&. Furthermore, single and double quotes are sometimes reserved. If you +want to include such a character as character, write it as follows: + +

  • &lt; instead of <

  • &gt; instead of >

  • &amp; instead of &

  • &apos; instead of '

  • &quot; instead of "

+ +All other characters are free in the document instance. It is possible to +include a character by its position in the Unicode alphabet: + +
&#n;
+ +where n is the decimal number of the +character. Alternatively, you can specify the character by its hexadecimal +number: + +
&#xn;
+ +In the scope of declarations, the character % is no longer free. To include it +as character, you must use the notations &#37; or +&#x25;.

Note that besides &lt;, &gt;, &amp;, +&apos;, and &quot; there are no predefines character entities. This is +different from HTML which defines a list of characters that can be referenced +by name (e.g. &auml; for ä); however, if you prefer named characters, you +can declare such entities yourself (see below).

1.2.3. Elements and ELEMENT declarations

Elements structure the document instance in a hierarchical way. There is a +top-level element, the root element, which contains a +sequence of inner elements and character sections. The inner elements are +structured in the same way. Every element has an element +type. The beginning of the element is indicated by a start +tag, written + +

<element-type>
+ +and the element continues until the corresponding end tag +is reached: + +
</element-type>
+ +In XML, it is not allowed to omit start or end tags, even if the DTD would +permit this. Note that there are no special rules how to interpret spaces or +newlines near start or end tags; all spaces and newlines count.

Every element type must be declared before it can be used. The declaration +consists of two parts: the ELEMENT declaration describes the content model, +i.e. which inner elements are allowed; the ATTLIST declaration describes the +attributes of the element.

An element can simply allow everything as content. This is written: + +

<!ELEMENT name ANY>
+ +On the opposite, an element can be forced to be empty; declared by: + +
<!ELEMENT name EMPTY>
+ +Note that there is an abbreviated notation for empty element instances: +<name/>.

There are two more sophisticated forms of declarations: so-called +mixed declarations, and regular +expressions. An element with mixed content contains character data +interspersed with inner elements, and the set of allowed inner elements can be +specified. In contrast to this, a regular expression declaration does not allow +character data, but the inner elements can be described by the more powerful +means of regular expressions.

A declaration for mixed content looks as follows: + +

<!ELEMENT name (#PCDATA | element1 | ... | elementn )*>
+ +or if you do not want to allow any inner element, simply + +
<!ELEMENT name (#PCDATA)>

Example

If element type q is declared as + +

<!ELEMENT q (#PCDATA | r | s)*>
+ +this is a legal instance: + +
<q>This is character data<r></r>with <s></s>inner elements</q>
+ +But this is illegal because t has not been enumerated in the +declaration: + +
<q>This is character data<r></r>with <t></t>inner elements</q>

The other form uses a regular expression to describe the possible contents: + +

<!ELEMENT name regexp>
+ +The following well-known regexp operators are allowed: + +

  • element-name

  • (subexpr1 , ... , subexprn )

  • (subexpr1 | ... | subexprn )

  • subexpr*

  • subexpr+

  • subexpr?

+ +The , operator indicates a sequence of sub-models, the +| operator describes alternative sub-models. The +* indicates zero or more repetitions, and ++ one or more repetitions. Finally, ? can +be used for optional sub-models. As atoms the regexp can contain names of +elements; note that it is not allowed to include #PCDATA.

The exact syntax of the regular expressions is rather strange. This can be +explained best by a list of constraints: + +

  • The outermost expression must not be +element-name.

    Illegal: +<!ELEMENT x y>; this must be written as +<!ELEMENT x (y)>.

  • For the unary operators subexpr*, +subexpr+, and +subexpr?, the +subexpr must not be again an +unary operator.

    Illegal: +<!ELEMENT x y**>; this must be written as +<!ELEMENT x (y*)*>.

  • Between ) and one of the unary operatory +*, +, or ?, there must +not be whitespace.

    Illegal: +<!ELEMENT x (y|z) *>; this must be written as +<!ELEMENT x (y|z)*>.

  • There is the additional constraint that the +right parenthsis must be contained in the same entity as the left parenthesis; +see the section about parsed entities below.

Note that there is another restriction on regular expressions which must be +deterministic. This means that the parser must be able to see by looking at the +next token which alternative is actually used, or whether the repetition +stops. The reason for this is simply compatability with SGML (there is no +intrinsic reason for this rule; XML can live without this restriction).

Example

The elements are declared as follows: + +

<!ELEMENT q (r?, (s | t)+)>
+<!ELEMENT r (#PCDATA)>
+<!ELEMENT s EMPTY>
+<!ELEMENT t (q | r)>
+ +This is a legal instance: + +
<q><r>Some characters</r><s/></q>
+ +(Note: <s/> is an abbreviation for +<s></s>.) + +It would be illegal to leave <s/> out because at +least one instance of s or t must be +present. It would be illegal, too, if characters existed outside the +r element; the only exception is white space. -- This is +legal, too: + +
<q><s/><t><q><s/></q></t></q>

1.2.4. Attribute lists and ATTLIST declarations

Elements may have attributes. These are put into the start tag of an element as +follows: + +

<element-name attribute1="value1" ... attributen="valuen">
+ +Instead of +"valuek" +it is also possible to use single quotes as in +'valuek'. +Note that you cannot use double quotes literally within the value of the +attribute if double quotes are the delimiters; the same applies to single +quotes. You can generally not use < and & as characters in attribute +values. It is possible to include the paraphrases &lt;, &gt;, +&amp;, &apos;, and &quot; (and any other reference to a general +entity as long as the entity is not defined by an external file) as well as +&#n;.

Before you can use an attribute you must declare it. An ATTLIST declaration +looks as follows: + +

<!ATTLIST element-name 
+          attribute-name attribute-type attribute-default
+          ...
+          attribute-name attribute-type attribute-default
+>
+ +There are a lot of types, but most important are: + +

  • CDATA: Every string is allowed as attribute value.

  • NMTOKEN: Every nametoken is allowed as attribute +value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary +order.

  • NMTOKENS: A space-separated list of nametokens is allowed as +attribute value.

+ +The most interesting default declarations are: + +

  • #REQUIRED: The attribute must be specified.

  • #IMPLIED: The attribute can be specified but also can be +left out. The application can find out whether the attribute was present or +not.

  • "value" or +'value': This particular value is +used as default if the attribute is omitted in the element.

Example

This is a valid attribute declaration for element type r: + +

<!ATTLIST r 
+          x CDATA    #REQUIRED
+          y NMTOKEN  #IMPLIED
+          z NMTOKENS "one two three">
+ +This means that x is a required attribute that cannot be +left out, while y and z are optional. The +XML parser indicates the application whether y is present or +not, but if z is missing the default value +"one two three" is returned automatically.

This is a valid example of these attributes: + +

<r x="He said: &quot;I don't like quotes!&quot;" y='1'>

1.2.5. Parsed entities

Elements describe the logical structure of the document, while +entities determine the physical structure. Entities are +the pieces of text the parser operates on, mostly files and macros. Entities +may be parsed in which case the parser reads the text and +interprets it as XML markup, or unparsed which simply +means that the data of the entity has a foreign format (e.g. a GIF icon).

If the parsed entity is going to be used as part of the DTD, it +is called a parameter entity. You can declare a parameter +entity with a fixed text as content by: + +

<!ENTITY % name "value">
+ +Within the DTD, you can refer to this entity, i.e. read +the text of the entity, by: + +
%name;
+ +Such entities behave like macros, i.e. when they are referred to, the +macro text is inserted and read instead of the original text. + +

Example

For example, you can declare two elements with the same content model by: + +

<!ENTITY % model "a | b | c">
+<!ELEMENT x (%model;)>
+<!ELEMENT y (%model;)>

+ +If the contents of the entity are given as string constant, the entity is +called an internal entity. It is also possible to name a +file to be used as content (an external entity): + +
<!ENTITY % name SYSTEM "file name">
+ +There are some restrictions for parameter entities: + +

  • If the internal parameter entity contains the first token of a declaration +(i.e. <!), it must also contain the last token of the +declaration, i.e. the >. This means that the entity +either contains a whole number of complete declarations, or some text from the +middle of one declaration.

    Illegal: +

    <!ENTITY % e "(a | b | c)>">
    +<!ELEMENT x %e;
    Because <! is contained in the main +entity, and the corresponding > is contained in the +entity e.

  • If the internal parameter entity contains a left paranthesis, it must also +contain the corresponding right paranthesis.

    Illegal: +

    <!ENTITY % e "(a | b | c">
    +<!ELEMENT x %e;)>
    Because ( is contained in the entity +e, and the corresponding ) is +contained in the main entity.

  • When reading text from an entity, the parser automatically inserts one space +character before the entity text and one space character after the entity +text. However, this rule is not applied within the definition of another +entity.

    Legal: +

     
    +<!ENTITY % suffix "gif"> 
    +<!ENTITY iconfile 'icon.%suffix;'>
    Because %suffix; is referenced within +the definition text for iconfile, no additional spaces are +added.

    Illegal: +

    <!ENTITY % suffix "test">
    +<!ELEMENT x.%suffix; ANY>
    +Because %suffix; is referenced outside the definition +text of another entity, the parser replaces %suffix; by +spacetestspace.

    Illegal: +

    <!ENTITY % e "(a | b | c)">
    +<!ELEMENT x %e;*>
    Because there is a whitespace between ) +and *, which is illegal.

  • An external parameter entity must always consist of a whole number of complete +declarations.

  • In the internal subset of the DTD, a reference to a parameter entity (internal +or external) is only allowed at positions where a new declaration can start.

If the parsed entity is going to be used in the document instance, it is called +a general entity. Such entities can be used as +abbreviations for frequent phrases, or to include external files. Internal +general entities are declared as follows: + +

<!ENTITY name "value">
+ +External general entities are declared this way: + +
<!ENTITY name SYSTEM "file name">
+ +References to general entities are written as: + +
&name;
+ +The main difference between parameter and general entities is that the former +are only recognized in the DTD and that the latter are only recognized in the +document instance. As the DTD is parsed before the document, the parameter +entities are expanded first; for example it is possible to use the content of a +parameter entity as the name of a general entity: +&#38;%name;;[1].

General entities must respect the element hierarchy. This means that there must +be an end tag for every start tag in the entity value, and that end tags +without corresponding start tags are not allowed.

Example

If the author of a document changes sometimes, it is worthwhile to set up a +general entity containing the names of the authors. If the author changes, you +need only to change the definition of the entity, and do not need to check all +occurrences of authors' names: + +

<!ENTITY authors "Gerd Stolpmann">
+ +In the document text, you can now refer to the author names by writing +&authors;.

Illegal: +The following two entities are illegal because the elements in the definition +do not nest properly: + +

<!ENTITY lengthy-tag "<section textcolor='white' background='graphic'>">
+<!ENTITY nonsense    "<a></b>">

Earlier in this introduction we explained that there are substitutes for +reserved characters: &lt;, &gt;, &amp;, &apos;, and +&quot;. These are simply predefined general entities; note that they are +the only predefined entities. It is allowed to define these entities again +as long as the meaning is unchanged.

1.2.6. Notations and unparsed entities

Unparsed entities have a foreign format and can thus not be read by the XML +parser. Unparsed entities are always external. The format of an unparsed entity +must have been declared, such a format is called a +notation. The entity can then be declared by referring to +this notation. As unparsed entities do not contain XML text, it is not possible +to include them directly into the document; you can only declare attributes +such that names of unparsed entities are acceptable values.

As you can see, unparsed entities are too complicated in order to have any +purpose. It is almost always better to simply pass the name of the data file as +normal attribute value, and let the application recognize and process the +foreign format.

Notes

[1]

This construct is only +allowed within the definition of another entity; otherwise extra spaces would +be added (as explained above). Such indirection is not recommended.

Complete example: +

<!ENTITY % variant "a">      <!-- or "b" -->
+<!ENTITY text-a "This is text A.">
+<!ENTITY text-b "This is text B.">
+<!ENTITY text "&#38;text-%variant;;">
+You can now write &text; in the document instance, and +depending on the value of variant either +text-a or text-b is inserted.


PrevHomeNext
What is XML?UpA complete example: The readme DTD
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html new file mode 100644 index 000000000..267730574 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1439.html @@ -0,0 +1,464 @@ +The class type extension
The PXP user's guide
PrevChapter 3. The objects representing the documentNext

3.3. The class type extension

class type [ 'node ] extension =
+  object ('self)
+    method clone : 'self
+      (* "clone" should return an exact deep copy of the object. *)
+    method node : 'node
+      (* "node" returns the corresponding node of this extension. This method
+       * intended to return exactly what previously has been set by "set_node".
+       *)
+    method set_node : 'node -> unit
+      (* "set_node" is invoked once the extension is associated to a new
+       * node object.
+       *)
+  end
+ +This is the type of classes used for node extensions. For every node of the +document tree, there is not only the node object, but also +an extension object. The latter has minimal +functionality; it has only the necessary methods to be attached to the node +object containing the details of the node instance. The extension object is +called extension because its purpose is extensibility.

For some reasons, it is impossible to derive the +node classes (i.e. element_impl and +data_impl) such that the subclasses can be extended by new +new methods. But +subclassing nodes is a great feature, because it allows the user to provide +different classes for different types of nodes. The extension objects are a +workaround that is as powerful as direct subclassing, the costs are +some notation overhead.

Figure 3-6. The structure of nodes and extensions

The picture shows how the nodes and extensions are linked +together. Every node has a reference to its extension, and every extension has +a reference to its node. The methods extension and +node follow these references; a typical phrase is + +

self # node # attribute "xy"
+ +to get the value of an attribute from a method defined in the extension object; +or + +
self # node # iter
+  (fun n -> n # extension # my_method ...)
+ +to iterate over the subnodes and to call my_method of the +corresponding extension objects.

Note that extension objects do not have references to subnodes +(or "subextensions") themselves; in order to get one of the children of an +extension you must first go to the node object, then get the child node, and +finally reach the extension that is logically the child of the extension you +started with.

3.3.1. How to define an extension class

At minimum, you must define the methods +clone, node, and +set_node such that your class is compatible with the type +extension. The method set_node is called +during the initialization of the node, or after a node has been cloned; the +node object invokes set_node on the extension object to tell +it that this node is now the object the extension is linked to. The extension +must return the node object passed as argument of set_node +when the node method is called.

The clone method must return a copy of the +extension object; at least the object itself must be duplicated, but if +required, the copy should deeply duplicate all objects and values that are +referred by the extension, too. Whether this is required, depends on the +application; clone is invoked by the node object when one of +its cloning methods is called.

A good starting point for an extension class: + +

class custom_extension =
+  object (self)
+
+    val mutable node = (None : custom_extension node option)
+
+    method clone = {< >} 
+
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+
+    method set_node n =
+      node <- Some n
+
+  end
+ +This class is compatible with extension. The purpose of +defining such a class is, of course, adding further methods; and you can do it +without restriction.

Often, you want not only one extension class. In this case, +it is the simplest way that all your classes (for one kind of document) have +the same type (with respect to the interface; i.e. it does not matter if your +classes differ in the defined private methods and instance variables, but +public methods count). This approach avoids lots of coercions and problems with +type incompatibilities. It is simple to implement: + +

class custom_extension =
+  object (self)
+    val mutable node = (None : custom_extension node option)
+
+    method clone = ...      (* see above *)
+    method node = ...       (* see above *)
+    method set_node n = ... (* see above *)
+
+    method virtual my_method1 : ...
+    method virtual my_method2 : ...
+    ... (* etc. *)
+  end
+
+class custom_extension_kind_A =
+  object (self)
+    inherit custom_extension
+
+    method my_method1 = ...
+    method my_method2 = ...
+  end
+
+class custom_extension_kind_B =
+  object (self)
+    inherit custom_extension
+
+    method my_method1 = ...
+    method my_method2 = ...
+  end
+ +If a class does not need a method (e.g. because it does not make sense, or it +would violate some important condition), it is possible to define the method +and to always raise an exception when the method is invoked +(e.g. assert false).

The latter is a strong recommendation: do not try to further +specialize the types of extension objects. It is difficult, sometimes even +impossible, and almost never worth-while.

3.3.2. How to bind extension classes to element types

Once you have defined your extension classes, you can bind them +to element types. The simplest case is that you have only one class and that +this class is to be always used. The parsing functions in the module +Pxp_yacc take a spec argument which +can be customized. If your single class has the name c, +this argument should be + +

let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl c)
+    ~default_element_exemplar: (new element_impl c)
+    ~element_alist:            []
+    ()
+ +This means that data nodes will be created from the exemplar passed by +~data_exemplar and that all element nodes will be made from the exemplar +specified by ~default_element_exemplar. In ~element_alist, you can +pass that different exemplars are to be used for different element types; but +this is an optional feature. If you do not need it, pass the empty list.

Remember that an exemplar is a (node, extension) pair that serves as pattern +when new nodes (and the corresponding extension objects) are added to the +document tree. In this case, the exemplar contains c as +extension, and when nodes are created, the exemplar is cloned, and cloning +makes also a copy of c such that all nodes of the document +tree will have a copy of c as extension.

The ~element_alist argument can bind +specific element types to specific exemplars; as exemplars may be instances of +different classes it is effectively possible to bind element types to +classes. For example, if the element type "p" is implemented by class "c_p", +and "q" is realized by "c_q", you can pass the following value: + +

let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl c)
+    ~default_element_exemplar: (new element_impl c)
+    ~element_alist:            
+      [ "p", new element_impl c_p;
+        "q", new element_impl c_q;
+      ]
+    ()
+ +The extension object c is still used for all data nodes and +for all other element types.


PrevHomeNext
The class type nodeUpDetails of the mapping from XML text to the tree representation
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html new file mode 100644 index 000000000..faea39fc6 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1496.html @@ -0,0 +1,442 @@ +Details of the mapping from XML text to the tree representation
The PXP user's guide
PrevChapter 3. The objects representing the documentNext

3.4. Details of the mapping from XML text to the tree representation

3.4.1. The representation of character-free elements

If an element declaration does not allow the element to +contain character data, the following rules apply.

If the element must be empty, i.e. it is declared with the +keyword EMPTY, the element instance must be effectively +empty (it must not even contain whitespace characters). The parser guarantees +that a declared EMPTY element does never contain a data +node, even if the data node represents the empty string.

If the element declaration only permits other elements to occur +within that element but not character data, it is still possible to insert +whitespace characters between the subelements. The parser ignores these +characters, too, and does not create data nodes for them.

Example. Consider the following element types: + +

<!ELEMENT x ( #PCDATA | z )* >
+<!ELEMENT y ( z )* >
+<!ELEMENT z EMPTY>
+ +Only x may contain character data, the keyword +#PCDATA indicates this. The other types are character-free.

The XML term + +

<x><z/> <z/></x>
+ +will be internally represented by an element node for x +with three subnodes: the first z element, a data node +containing the space character, and the second z element. +In contrast to this, the term + +
<y><z/> <z/></y>
+ +is represented by an element node for y with only +two subnodes, the two z elements. There +is no data node for the space character because spaces are ignored in the +character-free element y.

3.4.2. The representation of character data

The XML specification allows all Unicode characters in XML +texts. This parser can be configured such that UTF-8 is used to represent the +characters internally; however, the default character encoding is +ISO-8859-1. (Currently, no other encodings are possible for the internal string +representation; the type Pxp_types.rep_encoding enumerates +the possible encodings. Principially, the parser could use any encoding that is +ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and +ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal +encodings (or other multibyte encodings which are not ASCII-compatible) unless +major parts of the parser are rewritten - unlikely...)

The internal encoding may be different from the external encoding (specified +in the XML declaration <?xml ... encoding="..."?>); in +this case the strings are automatically converted to the internal encoding.

If the internal encoding is ISO-8859-1, it is possible that there are +characters that cannot be represented. In this case, the parser ignores such +characters and prints a warning (to the collect_warning +object that must be passed when the parser is called).

The XML specification allows lines to be separated by single LF +characters, by CR LF character sequences, or by single CR +characters. Internally, these separators are always converted to single LF +characters.

The parser guarantees that there are never two adjacent data +nodes; if necessary, data material that would otherwise be represented by +several nodes is collapsed into one node. Note that you can still create node +trees with adjacent data nodes; however, the parser does not return such trees.

Note that CDATA sections are not represented specially; such +sections are added to the current data material that being collected for the +next data node.

3.4.3. The representation of entities within documents

Entities are not represented within +documents! If the parser finds an entity reference in the document +content, the reference is immediately expanded, and the parser reads the +expansion text instead of the reference.

3.4.4. The representation of attributes

As attribute +values are composed of Unicode characters, too, the same problems with the +character encoding arise as for character material. Attribute values are +converted to the internal encoding, too; and if there are characters that +cannot be represented, these are dropped, and a warning is printed.

Attribute values are normalized before they are returned by +methods like attribute. First, any remaining entity +references are expanded; if necessary, expansion is performed recursively. +Second, newline characters (any of LF, CR LF, or CR characters) are converted +to single space characters. Note that especially the latter action is +prescribed by the XML standard (but is not converted +such that it is still possible to include line feeds into attributes).

3.4.5. The representation of processing instructions

Processing instructions are parsed to some extent: The first word of the +PI is called the target, and it is stored separated from the rest of the PI: + +

<?target rest?>
+ +The exact location where a PI occurs is not represented (by default). The +parser puts the PI into the object that represents the embracing construct (an +element, a DTD, or the whole document); that means you can find out which PIs +occur in a certain element, in the DTD, or in the whole document, but you +cannot lookup the exact position within the construct.

If you require the exact location of PIs, it is possible to +create extra nodes for them. This mode is controled by the option +enable_pinstr_nodes. The additional nodes have the node type +T_pinstr target, and are created +from special exemplars contained in the spec (see +pxp_document.mli).

3.4.6. The representation of comments

Normally, comments are not represented; they are dropped by +default. However, if you require them, it is possible to create +T_comment nodes for them. This mode can be specified by the +option enable_comment_nodes. Comment nodes are created from +special exemplars contained in the spec (see +pxp_document.mli). You can access the contents of comments through the +method comment.

3.4.7. The attributes xml:lang and +xml:space

These attributes are not supported specially; they are handled +like any other attribute.

3.4.8. And what about namespaces?

Currently, there is no special support for namespaces. +However, the parser allows it that the colon occurs in names such that it is +possible to implement namespaces on top of the current API.

Some future release of PXP will support namespaces as built-in +feature...


PrevHomeNext
The class type extensionUpConfiguring and calling the parser
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html new file mode 100644 index 000000000..06b1e60ea --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1629.html @@ -0,0 +1,895 @@ +Resolvers and sources
The PXP user's guide
PrevChapter 4. Configuring and calling the parserNext

4.2. Resolvers and sources

4.2.1. Using the built-in resolvers (called sources)

The type source enumerates the two +possibilities where the document to parse comes from. + +

type source =
+    Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver)
+  | ExtID of (ext_id * Pxp_reader.resolver)
+ +You normally need not to worry about this type as there are convenience +functions that create source values: + + +

  • from_file s: The document is read from +file s; you may specify absolute or relative path names. +The file name must be encoded as UTF-8 string.

    There is an optional argument ~system_encoding +specifying the character encoding which is used for the names of the file +system. For example, if this encoding is ISO-8859-1 and s is +also a ISO-8859-1 string, you can form the source: + +

    let s_utf8  =  recode_string ~in_enc:`Enc_iso88591 ~out_enc:`Enc_utf8 s in
    +from_file ~system_encoding:`Enc_iso88591 s_utf8

    This source has the advantage that +it is able to resolve inner external entities; i.e. if your document includes +data from another file (using the SYSTEM attribute), this +mode will find that file. However, this mode cannot resolve +PUBLIC identifiers nor SYSTEM identifiers +other than "file:".

  • from_channel ch: The document is read +from the channel ch. In general, this source also supports +file URLs found in the document; however, by default only absolute URLs are +understood. It is possible to associate an ID with the channel such that the +resolver knows how to interpret relative URLs: + +

    from_channel ~id:(System "file:///dir/dir1/") ch
    + +There is also the ~system_encoding argument specifying how file names are +encoded. - The example from above can also be written (but it is no +longer possible to interpret relative URLs because there is no ~id argument, +and computing this argument is relatively complicated because it must +be a valid URL): + +
    let ch = open_in s in
    +let src = from_channel ~system_encoding:`Enc_iso88591 ch in
    +...;
    +close_in ch

  • from_string s: The string +s is the document to parse. This mode is not able to +interpret file names of SYSTEM clauses, nor it can look up +PUBLIC identifiers.

    Normally, the encoding of the string is detected as usual +by analyzing the XML declaration, if any. However, it is also possible to +specify the encoding directly: + +

    let src = from_string ~fixenc:`ISO-8859-2 s

  • ExtID (id, r): The document to parse +is denoted by the identifier id (either a +SYSTEM or PUBLIC clause), and this +identifier is interpreted by the resolver r. Use this mode +if you have written your own resolver.

    Which character sets are possible depends on the passed +resolver r.

  • Entity (get_entity, r): The document +to parse is returned by the function invocation get_entity +dtd, where dtd is the DTD object to use (it may be +empty). Inner external references occuring in this entity are resolved using +the resolver r.

    Which character sets are possible depends on the passed +resolver r.

4.2.2. The resolver API

A resolver is an object that can be opened like a file, but you +do not pass the file name to the resolver, but the XML identifier of the entity +to read from (either a SYSTEM or PUBLIC +clause). When opened, the resolver must return the +Lexing.lexbuf that reads the characters. The resolver can +be closed, and it can be cloned. Furthermore, it is possible to tell the +resolver which character set it should assume. - The following from Pxp_reader: + +

exception Not_competent
+exception Not_resolvable of exn
+
+class type resolver =
+  object
+    method init_rep_encoding : rep_encoding -> unit
+    method init_warner : collect_warnings -> unit
+    method rep_encoding : rep_encoding
+    method open_in : ext_id -> Lexing.lexbuf
+    method close_in : unit
+    method change_encoding : string -> unit
+    method clone : resolver
+    method close_all : unit
+  end
+ +The resolver object must work as follows:

  • When the parser is called, it tells the resolver the +warner object and the internal encoding by invoking +init_warner and init_rep_encoding. The +resolver should store these values. The method rep_encoding +should return the internal encoding.

  • If the parser wants to read from the resolver, it invokes +the method open_in. Either the resolver succeeds, in which +case the Lexing.lexbuf reading from the file or stream must +be returned, or opening fails. In the latter case the method implementation +should raise an exception (see below).

  • If the parser finishes reading, it calls the +close_in method.

  • If the parser finds a reference to another external +entity in the input stream, it calls clone to get a second +resolver which must be initially closed (not yet connected with an input +stream). The parser then invokes open_in and the other +methods as described.

  • If you already know the character set of the input +stream, you should recode it to the internal encoding, and define the method +change_encoding as an empty method.

  • If you want to support multiple external character sets, +the object must follow a much more complicated protocol. Directly after +open_in has been called, the resolver must return a lexical +buffer that only reads one byte at a time. This is only possible if you create +the lexical buffer with Lexing.from_function; the function +must then always return 1 if the EOF is not yet reached, and 0 if EOF is +reached. If the parser has read the first line of the document, it will invoke +change_encoding to tell the resolver which character set to +assume. From this moment, the object can return more than one byte at once. The +argument of change_encoding is either the parameter of the +"encoding" attribute of the XML declaration, or the empty string if there is +not any XML declaration or if the declaration does not contain an encoding +attribute.

    At the beginning the resolver must only return one +character every time something is read from the lexical buffer. The reason for +this is that you otherwise would not exactly know at which position in the +input stream the character set changes.

    If you want automatic recognition of the character set, +it is up to the resolver object to implement this.

  • If an error occurs, the parser calls the method +close_all for the top-level resolver; this method should +close itself (if not already done) and all clones.

Exceptions. It is possible to chain resolvers such that when the first resolver is not able +to open the entity, the other resolvers of the chain are tried in turn. The +method open_in should raise the exception +Not_competent to indicate that the next resolver should try +to open the entity. If the resolver is able to handle the ID, but some other +error occurs, the exception Not_resolvable should be raised +to force that the chain breaks. +

Example: How to define a resolver that is equivalent to +from_string: ...

4.2.3. Predefined resolver components

There are some classes in Pxp_reader that define common resolver behaviour. + +

class resolve_read_this_channel : 
+    ?id:ext_id -> 
+    ?fixenc:encoding -> 
+    ?auto_close:bool -> 
+    in_channel -> 
+        resolver
+ +Reads from the passed channel (it may be even a pipe). If the +~id argument is passed to the object, the created resolver +accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has +been cloned, it does not accept any ID. This means that this resolver cannot +handle inner references to external entities. Note that you can combine this +resolver with another resolver that can handle inner references (such as +resolve_as_file); see class 'combine' below. - If you pass the +~fixenc argument, the encoding of the channel is set to the +passed value, regardless of any auto-recognition or any XML declaration. - If +~auto_close = true (which is the default), the channel is +closed after use. If ~auto_close = false, the channel is +left open. +

class resolve_read_any_channel : 
+    ?auto_close:bool -> 
+    channel_of_id:(ext_id -> (in_channel * encoding option)) -> 
+        resolver
+ +This resolver calls the function ~channel_of_id to open a +new channel for the passed ext_id. This function must either +return the channel and the encoding, or it must fail with Not_competent. The +function must return None as encoding if the default +mechanism to recognize the encoding should be used. It must return +Some e if it is already known that the encoding of the +channel is e. If ~auto_close = true +(which is the default), the channel is closed after use. If +~auto_close = false, the channel is left open.

class resolve_read_url_channel :
+    ?base_url:Neturl.url ->
+    ?auto_close:bool -> 
+    url_of_id:(ext_id -> Neturl.url) -> 
+    channel_of_url:(Neturl.url -> (in_channel * encoding option)) -> 
+        resolver
+ +When this resolver gets an ID to read from, it calls the function +~url_of_id to get the corresponding URL. This URL may be a +relative URL; however, a URL scheme must be used which contains a path. The +resolver converts the URL to an absolute URL if necessary. The second +function, ~channel_of_url, is fed with the absolute URL as +input. This function opens the resource to read from, and returns the channel +and the encoding of the resource.

Both functions, ~url_of_id and +~channel_of_url, can raise Not_competent to indicate that +the object is not able to read from the specified resource. However, there is a +difference: A Not_competent from ~url_of_id is left as it +is, but a Not_competent from ~channel_of_url is converted to +Not_resolvable. So only ~url_of_id decides which URLs are +accepted by the resolver and which not.

The function ~channel_of_url must return +None as encoding if the default mechanism to recognize the +encoding should be used. It must return Some e if it is +already known that the encoding of the channel is e.

If ~auto_close = true (which is the default), the channel is +closed after use. If ~auto_close = false, the channel is +left open.

Objects of this class contain a base URL relative to which relative URLs are +interpreted. When creating a new object, you can specify the base URL by +passing it as ~base_url argument. When an existing object is +cloned, the base URL of the clone is the URL of the original object. - Note +that the term "base URL" has a strict definition in RFC 1808.

class resolve_read_this_string : 
+    ?id:ext_id -> 
+    ?fixenc:encoding -> 
+    string -> 
+        resolver
+ +Reads from the passed string. If the ~id argument is passed +to the object, the created resolver accepts only this ID. Otherwise all IDs are +accepted. - Once the resolver has been cloned, it does not accept any ID. This +means that this resolver cannot handle inner references to external +entities. Note that you can combine this resolver with another resolver that +can handle inner references (such as resolve_as_file); see class 'combine' +below. - If you pass the ~fixenc argument, the encoding of +the string is set to the passed value, regardless of any auto-recognition or +any XML declaration.

class resolve_read_any_string : 
+    string_of_id:(ext_id -> (string * encoding option)) -> 
+        resolver
+ +This resolver calls the function ~string_of_id to get the +string for the passed ext_id. This function must either +return the string and the encoding, or it must fail with Not_competent. The +function must return None as encoding if the default +mechanism to recognize the encoding should be used. It must return +Some e if it is already known that the encoding of the +string is e.

class resolve_as_file :
+    ?file_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+    ?host_prefix:[ `Not_recognized | `Allowed | `Required ] ->
+    ?system_encoding:encoding ->
+    ?url_of_id:(ext_id -> Neturl.url) -> 
+    ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) ->
+    unit -> 
+        resolver
+Reads from the local file system. Every file name is interpreted as +file name of the local file system, and the referred file is read.

The full form of a file URL is: file://host/path, where +'host' specifies the host system where the file identified 'path' +resides. host = "" or host = "localhost" are accepted; other values +will raise Not_competent. The standard for file URLs is +defined in RFC 1738.

Option ~file_prefix: Specifies how the "file:" prefix of +file names is handled: +

  • `Not_recognized:The prefix is not +recognized.

  • `Allowed: The prefix is allowed but +not required (the default).

  • `Required: The prefix is +required.

Option ~host_prefix: Specifies how the "//host" phrase of +file names is handled: +

  • `Not_recognized:The prefix is not +recognized.

  • `Allowed: The prefix is allowed but +not required (the default).

  • `Required: The prefix is +required.

Option ~system_encoding: Specifies the encoding of file +names of the local file system. Default: UTF-8.

Options ~url_of_id, ~channel_of_url: Not +for the casual user!

class combine : 
+    ?prefer:resolver -> 
+    resolver list -> 
+        resolver
+ +Combines several resolver objects. If a concrete entity with an +ext_id is to be opened, the combined resolver tries the +contained resolvers in turn until a resolver accepts opening the entity +(i.e. it does not raise Not_competent on open_in).

Clones: If the 'clone' method is invoked before 'open_in', all contained +resolvers are cloned separately and again combined. If the 'clone' method is +invoked after 'open_in' (i.e. while the resolver is open), additionally the +clone of the active resolver is flagged as being preferred, i.e. it is tried +first.


PrevHomeNext
Configuring and calling the parserUpThe DTD classes
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html new file mode 100644 index 000000000..34f09c208 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1812.html @@ -0,0 +1,517 @@ +The DTD classes
The PXP user's guide
PrevChapter 4. Configuring and calling the parserNext

4.3. The DTD classes

Sorry, not yet +written. Perhaps the interface definition of Pxp_dtd expresses the same:


(**********************************************************************)
+(*                                                                    *)
+(* Pxp_dtd:                                                           *)
+(*     Object model of document type declarations                     *)
+(*                                                                    *)
+(**********************************************************************)
+
+(* ======================================================================
+ * OVERVIEW
+ *
+ * class dtd ............... represents the whole DTD, including element
+ *                           declarations, entity declarations, notation
+ *                           declarations, and processing instructions
+ * class dtd_element ....... represents an element declaration consisting
+ *                           of a content model and an attribute list
+ *                           declaration
+ * class dtd_notation ...... represents a notation declaration
+ * class proc_instruction .. represents a processing instruction
+ * ======================================================================
+ *
+ *)
+
+
+class dtd :
+  (* Creation:
+   *   new dtd
+   * creates a new, empty DTD object without any declaration, without a root
+   * element, without an ID.
+   *)
+  Pxp_types.collect_warnings -> 
+  Pxp_types.rep_encoding ->
+  object
+    method root : string option
+      (* get the name of the root element if present *)
+
+    method set_root : string -> unit
+      (* set the name of the root element. This method can be invoked 
+       * only once
+       *)
+
+    method id : Pxp_types.dtd_id option
+      (* get the identifier for this DTD *)
+
+    method set_id : Pxp_types.dtd_id -> unit
+      (* set the identifier. This method can be invoked only once *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* returns the encoding used for character representation *)
+
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - elements and notations that have not been added may be used in an
+       *   arbitrary way; the methods "element" and "notation" indicate this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary contents are allowed or not. *)
+
+    method standalone_declaration : bool
+      (* Whether there is a 'standalone' declaration or not. Strictly 
+       * speaking, this declaration is not part of the DTD, but it is
+       * included here because of practical reasons. 
+       * If not set, this property defaults to 'false'.
+       *)
+
+    method set_standalone_declaration : bool -> unit
+      (* Sets the 'standalone' declaration. *)
+
+
+    method add_element : dtd_element -> unit
+      (* add the given element declaration to this DTD. Raises Not_found
+       * if there is already an element declaration with the same name.
+       *)
+
+    method add_gen_entity : Pxp_entity.entity -> bool -> unit
+      (* add_gen_entity e extdecl:
+       * add the entity 'e' as general entity to this DTD (general entities
+       * are those represented by &name;). If there is already a declaration
+       * with the same name, the second definition is ignored; as exception from
+       * this rule, entities with names "lt", "gt", "amp", "quot", and "apos"
+       * may only be redeclared with a definition that is equivalent to the
+       * standard definition; otherwise a Validation_error is raised.
+       *
+       * 'extdecl': 'true' indicates that the entity declaration occurs in
+       * an external entity. (Used for the standalone check.)
+       *)
+
+    method add_par_entity : Pxp_entity.entity -> unit
+      (* add the given entity as parameter entity to this DTD (parameter
+       * entities are those represented by %name;). If there is already a 
+       * declaration with the same name, the second definition is ignored.
+       *)
+
+    method add_notation : dtd_notation -> unit
+      (* add the given notation to this DTD. If there is already a declaration
+       * with the same name, a Validation_error is raised.
+       *)
+
+    method add_pinstr : proc_instruction -> unit
+      (* add the given processing instruction to this DTD. *)
+
+    method element : string -> dtd_element
+      (* looks up the element declaration with the given name. Raises 
+       * Validation_error if the element cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method element_names : string list
+      (* returns the list of the names of all element declarations. *)
+
+    method gen_entity : string -> (Pxp_entity.entity * bool)
+      (* let e, extdecl = obj # gen_entity n:
+       * looks up the general entity 'e' with the name 'n'. Raises
+       * WF_error if the entity cannot be found.
+       * 'extdecl': indicates whether the entity declaration occured in an 
+       * external entity.
+       *)
+
+    method gen_entity_names : string list
+      (* returns the list of all general entity names *)
+
+    method par_entity : string -> Pxp_entity.entity
+      (* looks up the parameter entity with the given name. Raises
+       * WF_error if the entity cannot be found.
+       *)
+
+    method par_entity_names : string list
+      (* returns the list of all parameter entity names *)
+
+    method notation : string -> dtd_notation
+      (* looks up the notation declaration with the given name. Raises
+       * Validation_error if the notation cannot be found. (If "allow_arbitrary"
+       * has been invoked before, Unrestricted is raised instead.)
+       *)
+
+    method notation_names : string list
+      (* Returns the list of the names of all added notations *)
+
+    method pinstr : string -> proc_instruction list
+      (* looks up all processing instructions with the given target.
+       * The "target" is the identifier following "<?".
+       * Note: It is not possible to find out the exact position of the
+       * processing instruction.
+       *)
+
+    method pinstr_names : string list
+      (* Returns the list of the names (targets) of all added pinstrs *)
+
+    method validate : unit
+      (* ensures that the DTD is valid. This method is optimized such that
+       * actual validation is only performed if DTD has changed.
+       * If the DTD is invalid, mostly a Validation_error is raised,
+       * but other exceptions are possible, too.
+       *)
+
+    method only_deterministic_models : unit
+      (* Succeeds if all regexp content models are deterministic. 
+       * Otherwise Validation_error.
+       *)
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> bool -> unit
+      (* write_compact_as_latin1 os enc doctype:
+       * Writes the DTD as 'enc'-encoded string to 'os'. If 'doctype', a 
+       * DTD like <!DOCTYPE root [ ... ]> is written. If 'not doctype',
+       * only the declarations are written (the material within the
+       * square brackets).
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> bool -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+
+    (*----------------------------------------*)
+    method invalidate : unit
+      (* INTERNAL METHOD *)
+    method warner : Pxp_types.collect_warnings
+      (* INTERNAL METHOD *)
+  end
+
+
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_element : dtd -> string -> 
+  (* Creation:
+   *   new dtd_element init_dtd init_name:
+   * creates a new dtd_element object for init_dtd with init_name.
+   * The strings are represented in the same encoding as init_dtd.
+   *)
+  object
+
+    method name : string
+      (* returns the name of the declared element *)
+
+    method externally_declared : bool
+      (* returns whether the element declaration occurs in an external
+       * entity.
+       *)
+
+    method content_model : Pxp_types.content_model_type
+      (* get the content model of this element declaration, or Unspecified *)
+
+    method content_dfa : Pxp_dfa.dfa_definition option
+      (* return the DFA of the content model if there is a DFA, or None.
+       * A DFA exists only for regexp style content models which are
+       * deterministic.
+       *)
+
+    method set_cm_and_extdecl : Pxp_types.content_model_type -> bool -> unit
+      (* set_cm_and_extdecl cm extdecl:
+       * set the content model to 'cm'. Once the content model is not 
+       * Unspecified, it cannot be set to a different value again.
+       * Furthermore, it is set whether the element occurs in an external
+       * entity ('extdecl').
+       *)
+
+    method encoding : Pxp_types.rep_encoding
+      (* Return the encoding of the strings *)
+
+    method allow_arbitrary : unit
+      (* After this method has been invoked, the object changes its behaviour:
+       * - attributes that have not been added may be used in an
+       *   arbitrary way; the method "attribute" indicates this
+       *   by raising Undeclared instead of Validation_error.
+       *)
+
+    method disallow_arbitrary : unit
+
+    method arbitrary_allowed : bool
+      (* Returns whether arbitrary attributes are allowed or not. *)
+
+    method attribute : string -> 
+                         Pxp_types.att_type * Pxp_types.att_default
+      (* get the type and default value of a declared attribute, or raise
+       * Validation_error if the attribute does not exist.
+       * If 'arbitrary_allowed', the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_violates_standalone_declaration : 
+               string -> string option -> bool
+      (* attribute_violates_standalone_declaration name v:
+       * Checks whether the attribute 'name' violates the "standalone"
+       * declaration if it has value 'v'.
+       * The method returns true if:
+       * - The attribute declaration occurs in an external entity, 
+       * and if one of the two conditions holds:
+       * - v = None, and there is a default for the attribute value
+       * - v = Some s, and the type of the attribute is not CDATA,
+       *   and s changes if normalized according to the rules of the
+       *   attribute type.
+       *
+       * The method raises Validation_error if the attribute does not exist.
+       * If 'arbitrary_allowed', the exception Undeclared is raised instead
+       * of Validation_error.
+       *)
+
+    method attribute_names : string list
+      (* get the list of all declared attributes *)
+
+    method names_of_required_attributes : string list
+      (* get the list of all attributes that are specified as required 
+       * attributes
+       *)
+
+    method id_attribute_name : string option
+      (* Returns the name of the attribute with type ID, or None. *)
+
+    method idref_attribute_names : string list
+      (* Returns the names of the attributes with type IDREF or IDREFS. *)
+
+    method add_attribute : string -> 
+                           Pxp_types.att_type -> 
+			   Pxp_types.att_default -> 
+			   bool ->
+			     unit
+      (* add_attribute name type default extdecl:
+       * add an attribute declaration for an attribute with the given name,
+       * type, and default value. If there is more than one declaration for
+       * an attribute name, the first declaration counts; the other declarations
+       * are ignored.
+       * 'extdecl': if true, the attribute declaration occurs in an external
+       * entity. This property is used to check the "standalone" attribute.
+       *)
+
+    method validate : unit
+      (* checks whether this element declaration (i.e. the content model and
+       * all attribute declarations) is valid for the associated DTD.
+       * Raises mostly Validation_error if the validation fails.
+       *)
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the <!ELEMENT ... > declaration to 'os' as 'enc'-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and dtd_notation : string -> Pxp_types.ext_id -> Pxp_types.rep_encoding ->
+  (* Creation:
+   *    new dtd_notation a_name an_external_ID init_encoding
+   * creates a new dtd_notation object with the given name and the given
+   * external ID.
+   *)
+  object
+    method name : string
+    method ext_id : Pxp_types.ext_id
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* write_compact_as_latin1 os enc:
+       * Writes the <!NOTATION ... > declaration to 'os' as 'enc'-encoded 
+       * string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+  end
+
+(* ---------------------------------------------------------------------- *)
+
+and proc_instruction : string -> string -> Pxp_types.rep_encoding ->
+  (* Creation:
+   *   new proc_instruction a_target a_value
+   * creates a new proc_instruction object with the given target string and
+   * the given value string. 
+   * Note: A processing instruction is written as <?target value?>. 
+   *)
+  object
+    method target : string
+    method value : string
+    method encoding : Pxp_types.rep_encoding
+
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+      (* write os enc:
+       * Writes the <?...?> PI to 'os' as 'enc'-encoded string.
+       *)
+
+    method write_compact_as_latin1 : Pxp_types.output_stream -> unit
+      (* DEPRECATED METHOD; included only to keep compatibility with
+       * older versions of the parser
+       *)
+
+    method parse_pxp_option : (string * string * (string * string) list)
+      (* Parses a PI containing a PXP option. Such PIs are formed like:
+       *   <?target option-name option-att="value" option-att="value" ... ?>
+       * The method returns a triple
+       *   (target, option-name, [option-att, value; ...])
+       * or raises Error.
+       *)
+
+  end
+
+;;


PrevHomeNext
Resolvers and sourcesUpInvoking the parser
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html new file mode 100644 index 000000000..b289a3674 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1818.html @@ -0,0 +1,779 @@ +Invoking the parser
The PXP user's guide
PrevChapter 4. Configuring and calling the parserNext

4.4. Invoking the parser

Here a description of Pxp_yacc.

4.4.1. Defaults

The following defaults are available: + +

val default_config : config
+val default_extension : ('a node extension) as 'a
+val default_spec : ('a node extension as 'a) spec

4.4.2. Parsing functions

In the following, the term "closed document" refers to +an XML structure like + +

<!DOCTYPE ... [ declarations ] >
+<root>
+...
+</root>
+ +The term "fragment" refers to an XML structure like + +
<root>
+...
+</root>
+ +i.e. only to one isolated element instance.

val parse_dtd_entity : config -> source -> dtd
+ +Parses the declarations which are contained in the entity, and returns them as +dtd object.

val extract_dtd_from_document_entity : config -> source -> dtd
+ +Extracts the DTD from a closed document. Both the internal and the external +subsets are extracted and combined to one dtd object. This +function does not parse the whole document, but only the parts that are +necessary to extract the DTD.

val parse_document_entity : 
+    ?transform_dtd:(dtd -> dtd) ->
+    ?id_index:('ext index) ->
+    config -> 
+    source -> 
+    'ext spec -> 
+        'ext document
+ +Parses a closed document and validates it against the DTD that is contained in +the document (internal and external subsets). The option +~transform_dtd can be used to transform the DTD in the +document, and to use the transformed DTD for validation. If +~id_index is specified, an index of all ID attributes is +created.

val parse_wfdocument_entity : 
+    config -> 
+    source -> 
+    'ext spec -> 
+        'ext document
+ +Parses a closed document, but checks it only on well-formedness.

val parse_content_entity  : 
+    ?id_index:('ext index) ->
+    config ->  
+    source -> 
+    dtd -> 
+    'ext spec -> 
+        'ext node
+ +Parses a fragment, and validates the element.

val parse_wfcontent_entity : 
+    config -> 
+    source -> 
+    'ext spec -> 
+        'ext node
+ +Parses a fragment, but checks it only on well-formedness.

4.4.3. Configuration options

type config =
+    { warner : collect_warnings;
+      errors_with_line_numbers : bool;
+      enable_pinstr_nodes : bool;
+      enable_super_root_node : bool;
+      enable_comment_nodes : bool;
+      encoding : rep_encoding;
+      recognize_standalone_declaration : bool;
+      store_element_positions : bool;
+      idref_pass : bool;
+      validate_by_dfa : bool;
+      accept_only_deterministic_models : bool;
+      ...
+    }
+ +

  • warner:The parser prints +warnings by invoking the method warn for this warner +object. (Default: all warnings are dropped)

  • errors_with_line_numbers:If +true, errors contain line numbers; if false, errors contain only byte +positions. The latter mode is faster. (Default: true)

  • enable_pinstr_nodes:If true, +the parser creates extra nodes for processing instructions. If false, +processing instructions are simply added to the element or document surrounding +the instructions. (Default: false)

  • enable_super_root_node:If +true, the parser creates an extra node which is the parent of the root of the +document tree. This node is called super root; it is an element with type +T_super_root. - If there are processing instructions outside +the root element and outside the DTD, they are added to the super root instead +of the document. - If false, the super root node is not created. (Default: +false)

  • enable_comment_nodes:If true, +the parser creates nodes for comments with type T_comment; +if false, such nodes are not created. (Default: false)

  • encoding:Specifies the +internal encoding of the parser. Most strings are then represented according to +this encoding; however there are some exceptions (especially +ext_id values which are always UTF-8 encoded). +(Default: `Enc_iso88591)

  • recognize_standalone_declaration: If true and if the parser is +validating, the standalone="yes" declaration forces that it +is checked whether the document is a standalone document. - If false, or if the +parser is in well-formedness mode, such declarations are ignored. +(Default: true)

  • store_element_positions: If +true, for every non-data node the source position is stored. If false, the +position information is lost. If available, you can get the positions of nodes +by invoking the position method. +(Default: true)

  • idref_pass:If true and if +there is an ID index, the parser checks whether every IDREF or IDREFS attribute +refer to an existing node; this requires that the parser traverses the whole +doument tree. If false, this check is left out. (Default: false)

  • validate_by_dfa:If true and if +the content model for an element type is deterministic, a deterministic finite +automaton is used to validate whether the element contents match the content +model of the type. If false, or if a DFA is not available, a backtracking +algorithm is used for validation. (Default: true)

  • accept_only_deterministic_models: If true, only deterministic content +models are accepted; if false, any syntactically correct content models can be +processed. (Default: true)

4.4.4. Which configuration should I use?

First, I recommend to vary the default configuration instead of +creating a new configuration record. For instance, to set +idref_pass to true, change the default +as in: +

let config = { default_config with idref_pass = true }
+The background is that I can add more options to the record in future versions +of the parser without breaking your programs.

Do I need extra nodes for processing instructions? By default, such nodes are not created. This does not mean that the +processing instructions are lost; however, you cannot find out the exact +location where they occur. For example, the following XML text + +

<x><?pi1?><y/><?pi2?></x> 
+ +will normally create one element node for x containing +one subnode for y. The processing +instructions are attached to x in a separate hash table; you +can access them using x # pinstr "pi1" and x # +pinstr "pi2", respectively. The information is lost where the +instructions occur within x.

If the option enable_pinstr_nodes is +turned on, the parser creates extra nodes pi1 and +pi2 such that the subnodes of x are now: + +

x # sub_nodes = [ pi1; y; pi2 ]
+ +The extra nodes contain the processing instructions in the usual way, i.e. you +can access them using pi1 # pinstr "pi1" and pi2 # +pinstr "pi2", respectively.

Note that you will need an exemplar for the PI nodes (see +make_spec_from_alist).

Do I need a super root node? By default, there is no super root node. The +document object refers directly to the node representing the +root element of the document, i.e. + +

doc # root = r
+ +if r is the root node. This is sometimes inconvenient: (1) +Some algorithms become simpler if every node has a parent, even the root +node. (2) Some standards such as XPath call the "root node" the node whose +child represents the root of the document. (3) The super root node can serve +as a container for processing instructions outside the root element. Because of +these reasons, it is possible to create an extra super root node, whose child +is the root node: + +
doc # root = sr         &&
+sr # sub_nodes = [ r ]
+ +When extra nodes are also created for processing instructions, these nodes can +be added to the super root node if they occur outside the root element (reason +(3)), and the order reflects the order in the source text.

Note that you will need an exemplar for the super root node +(see make_spec_from_alist).

What is the effect of the UTF-8 encoding? By default, the parser represents strings (with few +exceptions) as ISO-8859-1 strings. These are well-known, and there are tools +and fonts for this encoding.

However, internationalization may require that you switch over +to UTF-8 encoding. In most environments, the immediate effect will be that you +cannot read strings with character codes >= 160 any longer; your terminal will +only show funny glyph combinations. It is strongly recommended to install +Unicode fonts (GNU Unifont, +Markus Kuhn's fonts) and terminal emulators +that can handle UTF-8 byte sequences. Furthermore, a Unicode editor may +be helpful (such as Yudit). There are +also FAQ by +Markus Kuhn.

By setting encoding to +`Enc_utf8 all strings originating from the parsed XML +document are represented as UTF-8 strings. This includes not only character +data and attribute values but also element names, attribute names and so on, as +it is possible to use any Unicode letter to form such names. Strictly +speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it +will have difficulties when validating documents containing +non-ISO-8859-1-names.

This mode does not have any impact on the external +representation of documents. The character set assumed when reading a document +is set in the XML declaration, and character set when writing a document must +be passed to the write method.

How do I check that nodes exist which are referred by IDREF attributes? First, you must create an index of all occurring ID +attributes: + +

let index = new hash_index
+ +This index must be passed to the parsing function: + +
parse_document_entity
+  ~id_index:(index :> index)
+  config source spec
+ +Next, you must turn on the idref_pass mode: + +
let config = { default_config with idref_pass = true }
+ +Note that now the whole document tree will be traversed, and every node will be +checked for IDREF and IDREFS attributes. If the tree is big, this may take some +time.

What are deterministic content models? These type of models can speed up the validation checks; +furthermore they ensure SGML-compatibility. In particular, a content model is +deterministic if the parser can determine the actually used alternative by +inspecting only the current token. For example, this element has +non-deterministic contents: + +

<!ELEMENT x ((u,v) | (u,y+) | v)>
+ +If the first element in x is u, the +parser does not know which of the alternatives (u,v) or +(u,y+) will work; the parser must also inspect the second +element to be able to distinguish between the alternatives. Because such +look-ahead (or "guessing") is required, this example is +non-deterministic.

The XML standard demands that content models must be +deterministic. So it is recommended to turn the option +accept_only_deterministic_models on; however, PXP can also +process non-deterministic models using a backtracking algorithm.

Deterministic models ensure that validation can be performed in +linear time. In order to get the maximum benefits, PXP also implements a +special validator that profits from deterministic models; this is the +deterministic finite automaton (DFA). This validator is enabled per element +type if the element type has a deterministic model and if the option +validate_by_dfa is turned on.

In general, I expect that the DFA method is faster than the +backtracking method; especially in the worst case the DFA takes only linear +time. However, if the content model has only few alternatives and the +alternatives do not nest, the backtracking algorithm may be better.


PrevHomeNext
The DTD classesUpUpdates
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html new file mode 100644 index 000000000..8fc856264 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x1965.html @@ -0,0 +1,152 @@ +Updates
The PXP user's guide
PrevChapter 4. Configuring and calling the parser 

4.5. Updates

Some (often later added) features that are otherwise +not explained in the manual but worth to be mentioned.

  • Methods node_position, node_path, nth_node, +previous_node, next_node for nodes: See pxp_document.mli

  • Functions to determine the document order of nodes: +compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli


PrevHome 
Invoking the parserUp 
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html new file mode 100644 index 000000000..dc9cc1e8c --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x468.html @@ -0,0 +1,474 @@ +A complete example: The readme DTD
The PXP user's guide
PrevChapter 1. What is XML?Next

1.3. A complete example: The readme DTD

The reason for readme was that I often wrote two versions +of files such as README and INSTALL which explain aspects of a distributed +software archive; one version was ASCII-formatted, the other was written in +HTML. Maintaining both versions means double amount of work, and changes +of one version may be forgotten in the other version. To improve this situation +I invented the readme DTD which allows me to maintain only +one source written as XML document, and to generate the ASCII and the HTML +version from it.

In this section, I explain only the DTD. The readme DTD is +contained in the PXP distribution together with the two converters to +produce ASCII and HTML. Another section of this manual describes the HTML +converter.

The documents have a simple structure: There are up to three levels of nested +sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The +outermost element has usually the type readme, it is +declared by + +

<!ELEMENT readme (sect1+)>
+<!ATTLIST readme
+          title CDATA #REQUIRED>
+ +This means that this element contains one or more sections of the first level +(element type sect1), and that the element has a required +attribute title containing character data (CDATA). Note that +readme elements must not contain text data.

The three levels of sections are declared as follows: + +

<!ELEMENT sect1 (title,(sect2|p|ul)+)>
+
+<!ELEMENT sect2 (title,(sect3|p|ul)+)>
+
+<!ELEMENT sect3 (title,(p|ul)+)>
+ +Every section has a title element as first subelement. After +the title an arbitrary but non-empty sequence of inner sections, paragraphs and +item lists follows. Note that the inner sections must belong to the next higher +section level; sect3 elements must not contain inner +sections because there is no next higher level.

Obviously, all three declarations allow paragraphs (p) and +item lists (ul). The definition can be simplified at this +point by using a parameter entity: + +

<!ENTITY % p.like "p|ul">
+
+<!ELEMENT sect1 (title,(sect2|%p.like;)+)>
+
+<!ELEMENT sect2 (title,(sect3|%p.like;)+)>
+
+<!ELEMENT sect3 (title,(%p.like;)+)>
+ +Here, the entity p.like is nothing but a macro abbreviating +the same sequence of declarations; if new elements on the same level as +p and ul are later added, it is +sufficient only to change the entity definition. Note that there are some +restrictions on the usage of entities in this context; most important, entities +containing a left paranthesis must also contain the corresponding right +paranthesis.

Note that the entity p.like is a +parameter entity, i.e. the ENTITY declaration contains a +percent sign, and the entity is referred to by +%p.like;. This kind of entity must be used to abbreviate +parts of the DTD; the general entities declared without +percent sign and referred to as &name; are not allowed +in this context.

The title element specifies the title of the section in +which it occurs. The title is given as character data, optionally interspersed +with line breaks (br): + +

<!ELEMENT title (#PCDATA|br)*>
+ +Compared with the title attribute of +the readme element, this element allows inner markup +(i.e. br) while attribute values do not: It is an error if +an attribute value contains the left angle bracket < literally such that it +is impossible to include inner elements.

The paragraph element p has a structure similar to +title, but it allows more inner elements: + +

<!ENTITY % text "br|code|em|footnote|a">
+
+<!ELEMENT p (#PCDATA|%text;)*>
+ +Line breaks do not have inner structure, so they are declared as being empty: + +
<!ELEMENT br EMPTY>
+ +This means that really nothing is allowed within br; you +must always write <br></br> or abbreviated +<br/>.

Code samples should be marked up by the code tag; emphasized +text can be indicated by em: + +

<!ELEMENT code (#PCDATA)>
+
+<!ELEMENT em (#PCDATA|%text;)*>
+ +That code elements are not allowed to contain further markup +while em elements do is a design decision by the author of +the DTD.

Unordered lists simply consists of one or more list items, and a list item may +contain paragraph-level material: + +

<!ELEMENT ul (li+)>
+
+<!ELEMENT li (%p.like;)*>
+ +Footnotes are described by the text of the note; this text may contain +text-level markup. There is no mechanism to describe the numbering scheme of +footnotes, or to specify how footnote references are printed. + +
<!ELEMENT footnote (#PCDATA|%text;)*>
+ +Hyperlinks are written as in HTML. The anchor tag contains the text describing +where the link points to, and the href attribute is the +pointer (as URL). There is no way to describe locations of "hash marks". If the +link refers to another readme document, the attribute +readmeref should be used instead of href. +The reason is that the converted document has usually a different system +identifier (file name), and the link to a converted document must be +converted, too. + +
<!ELEMENT a (#PCDATA)*>
+<!ATTLIST a 
+          href      CDATA #IMPLIED
+          readmeref CDATA #IMPLIED
+>
+ +Note that although it is only sensible to specify one of the two attributes, +the DTD has no means to express this restriction.

So far the DTD. Finally, here is a document for it: + +

<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE readme SYSTEM "readme.dtd">
+<readme title="How to use the readme converters">
+<sect1>
+  <title>Usage</title>
+  <p>
+    The <em>readme</em> converter is invoked on the command line by:
+  </p>
+  <p>
+    <code>readme [ -text | -html ] input.xml</code>
+  </p>
+  <p>
+    Here a list of options:
+  </p>
+  <ul>
+    <li>
+      <p><code>-text</code>: specifies that ASCII output should be produced</p>
+    </li>
+    <li>
+      <p><code>-html</code>: specifies that HTML output should be produced</p>
+    </li>
+  </ul>
+  <p>
+    The input file must be given on the command line. The converted output is
+    printed to <em>stdout</em>.
+  </p>
+</sect1>
+<sect1>
+  <title>Author</title>
+  <p>
+    The program has been written by
+    <a href="mailto:Gerd.Stolpmann@darmstadt.netsurf.de">Gerd Stolpmann</a>.
+  </p>
+</sect1>
+</readme>


PrevHomeNext
Highlights of XMLUpUsing PXP
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html new file mode 100644 index 000000000..f2dcdd79b --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x550.html @@ -0,0 +1,765 @@ +How to parse a document from an application
The PXP user's guide
PrevChapter 2. Using PXPNext

2.2. How to parse a document from an application

Let me first give a rough overview of the object model of the parser. The +following items are represented by objects: + +

  • Documents: The document representation is more or less the +anchor for the application; all accesses to the parsed entities start here. It +is described by the class document contained in the module +Pxp_document. You can get some global information, such +as the XML declaration the document begins with, the DTD of the document, +global processing instructions, and most important, the document tree.

  • The contents of documents: The contents have the structure +of a tree: Elements contain other elements and text[1]. + +The common type to represent both kinds of content is node +which is a class type that unifies the properties of elements and character +data. Every node has a list of children (which is empty if the element is empty +or the node represents text); nodes may have attributes; nodes have always text +contents. There are two implementations of node, the class +element_impl for elements, and the class +data_impl for text data. You find these classes and class +types in the module Pxp_document, too.

    Note that attribute lists are represented by non-class values.

  • The node extension: For advanced usage, every node of the +document may have an associated extension which is simply +a second object. This object must have the three methods +clone, node, and +set_node as bare minimum, but you are free to add methods as +you want. This is the preferred way to add functionality to the document +tree[2]. The class type extension is +defined in Pxp_document, too.

  • The DTD: Sometimes it is necessary to access the DTD of a +document; the average application does not need this feature. The class +dtd describes DTDs, and makes it possible to get +representations of element, entity, and notation declarations as well as +processing instructions contained in the DTD. This class, and +dtd_element, dtd_notation, and +proc_instruction can be found in the module +Pxp_dtd. There are a couple of classes representing +different kinds of entities; these can be found in the module +Pxp_entity.

+ +Additionally, the following modules play a role: + +

  • Pxp_yacc: Here the main parsing functions such as +parse_document_entity are located. Some additional types and +functions allow the parser to be configured in a non-standard way.

  • Pxp_types: This is a collection of basic types and +exceptions.

+ +There are some further modules that are needed internally but are not part of +the API.

Let the document to be parsed be stored in a file called +doc.xml. The parsing process is started by calling the +function + +

val parse_document_entity : config -> source -> 'ext spec -> 'ext document
+ +defined in the module Pxp_yacc. The first argument +specifies some global properties of the parser; it is recommended to start with +the default_config. The second argument determines where the +document to be parsed comes from; this may be a file, a channel, or an entity +ID. To parse doc.xml, it is sufficient to pass +from_file "doc.xml".

The third argument passes the object specification to use. Roughly +speaking, it determines which classes implement the node objects of which +element types, and which extensions are to be used. The 'ext +polymorphic variable is the type of the extension. For the moment, let us +simply pass default_spec as this argument, and ignore it.

So the following expression parses doc.xml: + +

open Pxp_yacc
+let d = parse_document_entity default_config (from_file "doc.xml") default_spec
+ +Note that default_config implies that warnings are collected +but not printed. Errors raise one of the exception defined in +Pxp_types; to get readable errors and warnings catch the +exceptions as follows: + +
class warner =
+  object 
+    method warn w =
+      print_endline ("WARNING: " ^ w)
+  end
+;;
+
+try
+  let config = { default_config with warner = new warner } in
+  let d = parse_document_entity config (from_file "doc.xml") default_spec
+  in
+    ...
+with
+   e ->
+     print_endline (Pxp_types.string_of_exn e)
+ +Now d is an object of the document +class. If you want the node tree, you can get the root element by + +
let root = d # root
+ +and if you would rather like to access the DTD, determine it by + +
let dtd = d # dtd
+ +As it is more interesting, let us investigate the node tree now. Given the root +element, it is possible to recursively traverse the whole tree. The children of +a node n are returned by the method +sub_nodes, and the type of a node is returned by +node_type. This function traverses the tree, and prints the +type of each node: + +
let rec print_structure n =
+  let ntype = n # node_type in
+  match ntype with
+    T_element name ->
+      print_endline ("Element of type " ^ name);
+      let children = n # sub_nodes in
+      List.iter print_structure children
+  | T_data ->
+      print_endline "Data"
+  | _ ->
+      (* Other node types are not possible unless the parser is configured
+         differently.
+       *)
+      assert false
+ +You can call this function by + +
print_structure root
+ +The type returned by node_type is either T_element +name or T_data. The name of the +element type is the string included in the angle brackets. Note that only +elements have children; data nodes are always leaves of the tree.

There are some more methods in order to access a parsed node tree: + +

  • n # parent: Returns the parent node, or raises +Not_found if the node is already the root

  • n # root: Returns the root of the node tree.

  • n # attribute a: Returns the value of the attribute with +name a. The method returns a value for every +declared attribute, independently of whether the attribute +instance is defined or not. If the attribute is not declared, +Not_found will be raised. (In well-formedness mode, every +attribute is considered as being implicitly declared with type +CDATA.)

    The following return values are possible: Value s, +Valuelist sl , and Implied_value. +The first two value types indicate that the attribute value is available, +either because there is a definition +a="value" +in the XML text, or because there is a default value (declared in the +DTD). Only if both the instance definition and the default declaration are +missing, the latter value Implied_value will be returned.

    In the DTD, every attribute is typed. There are single-value types (CDATA, ID, +IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes +Value s back, where s is the normalized +string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS) +represent list values, and the parser splits the XML literal into several +tokens and returns these tokens as Valuelist sl.

    Normalization means that entity references (the +&name; tokens) and +character references +(&#number;) are replaced +by the text they represent, and that white space characters are converted into +plain spaces.

  • n # data: Returns the character data contained in the +node. For data nodes, the meaning is obvious as this is the main content of +data nodes. For element nodes, this method returns the concatenated contents of +all inner data nodes.

    Note that entity references included in the text are resolved while they are +being parsed; for example the text "a &lt;&gt; b" will be returned +as "a <> b" by this method. Spaces of data nodes are always +preserved. Newlines are preserved, but always converted to \n characters even +if newlines are encoded as \r\n or \r. Normally you will never see two adjacent +data nodes because the parser collapses all data material at one location into +one node. (However, if you create your own tree or transform the parsed tree, +it is possible to have adjacent data nodes.)

    Note that elements that do not allow #PCDATA as content +will not have data nodes as children. This means that spaces and newlines, the +only character material allowed for such elements, are silently dropped.

+ +For example, if the task is to print all contents of elements with type +"valuable" whose attribute "priority" is "1", this function can help: + +
let rec print_valuable_prio1 n =
+  let ntype = n # node_type in
+  match ntype with
+    T_element "valuable" when n # attribute "priority" = Value "1" ->
+      print_endline "Valuable node with priotity 1 found:";
+      print_endline (n # data)
+  | (T_element _ | T_data) ->
+      let children = n # sub_nodes in
+      List.iter print_valuable_prio1 children
+  | _ ->
+      assert false
+ +You can call this function by: + +
print_valuable_prio1 root
+ +If you like a DSSSL-like style, you can make the function +process_children explicit: + +
let rec print_valuable_prio1 n =
+
+  let process_children n =
+    let children = n # sub_nodes in
+    List.iter print_valuable_prio1 children 
+  in
+
+  let ntype = n # node_type in
+  match ntype with
+    T_element "valuable" when n # attribute "priority" = Value "1" ->
+      print_endline "Valuable node with priority 1 found:";
+      print_endline (n # data)
+  | (T_element _ | T_data) ->
+      process_children n
+  | _ ->
+      assert false
+ +So far, O'Caml is now a simple "style-sheet language": You can form a big +"match" expression to distinguish between all significant cases, and provide +different reactions on different conditions. But this technique has +limitations; the "match" expression tends to get larger and larger, and it is +difficult to store intermediate values as there is only one big +recursion. Alternatively, it is also possible to represent the various cases as +classes, and to use dynamic method lookup to find the appropiate class. The +next section explains this technique in detail.

Notes

[1]

Elements may +also contain processing instructions. Unlike other document models, PXP +separates processing instructions from the rest of the text and provides a +second interface to access them (method pinstr). However, +there is a parser option (enable_pinstr_nodes) which changes +the behaviour of the parser such that extra nodes for processing instructions +are included into the tree.

Furthermore, the tree does normally not contain nodes for XML comments; +they are ignored by default. Again, there is an option +(enable_comment_nodes) changing this.

[2]

Due to the typing system it is more or less impossible to +derive recursive classes in O'Caml. To get around this, it is common practice +to put the modifiable or extensible part of recursive objects into parallel +objects.


PrevHomeNext
Using PXPUpClass-based processing of the node tree
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html new file mode 100644 index 000000000..cf3f4737c --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x675.html @@ -0,0 +1,538 @@ +Class-based processing of the node tree
The PXP user's guide
PrevChapter 2. Using PXPNext

2.3. Class-based processing of the node tree

By default, the parsed node tree consists of objects of the same class; this is +a good design as long as you want only to access selected parts of the +document. For complex transformations, it may be better to use different +classes for objects describing different element types.

For example, if the DTD declares the element types a, +b, and c, and if the task is to convert +an arbitrary document into a printable format, the idea is to define for every +element type a separate class that has a method print. The +classes are eltype_a, eltype_b, and +eltype_c, and every class implements +print such that elements of the type corresponding to the +class are converted to the output format.

The parser supports such a design directly. As it is impossible to derive +recursive classes in O'Caml[1], the specialized element classes cannot be formed by +simply inheriting from the built-in classes of the parser and adding methods +for customized functionality. To get around this limitation, every node of the +document tree is represented by two objects, one called +"the node" and containing the recursive definition of the tree, one called "the +extension". Every node object has a reference to the extension, and the +extension has a reference to the node. The advantage of this model is that it +is now possible to customize the extension without affecting the typing +constraints of the recursive node definition.

Every extension must have the three methods clone, +node, and set_node. The method +clone creates a deep copy of the extension object and +returns it; node returns the node object for this extension +object; and set_node is used to tell the extension object +which node is associated with it, this method is automatically called when the +node tree is initialized. The following definition is a good starting point +for these methods; usually clone must be further refined +when instance variables are added to the class: + +

class custom_extension =
+  object (self)
+
+    val mutable node = (None : custom_extension node option)
+
+    method clone = {< >} 
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+    method set_node n =
+      node <- Some n
+
+  end
+ +This part of the extension is usually the same for all classes, so it is a good +idea to consider custom_extension as the super-class of the +further class definitions. Continuining the example of above, we can define the +element type classes as follows: + +
class virtual custom_extension =
+  object (self)
+    ... clone, node, set_node defined as above ...
+
+    method virtual print : out_channel -> unit
+  end
+
+class eltype_a =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+
+class eltype_b =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+
+class eltype_c =
+  object (self)
+    inherit custom_extension
+    method print ch = ...
+  end
+ +The method print can now be implemented for every element +type separately. Note that you get the associated node by invoking + +
self # node
+ +and you get the extension object of a node n by writing + +
n # extension
+ +It is guaranteed that + +
self # node # extension == self
+ +always holds.

Here are sample definitions of the print +methods: + +

class eltype_a =
+  object (self)
+    inherit custom_extension
+    method print ch = 
+      (* Nodes <a>...</a> are only containers: *)
+      output_string ch "(";
+      List.iter
+        (fun n -> n # extension # print ch)
+        (self # node # sub_nodes);
+      output_string ch ")";
+  end
+
+class eltype_b =
+  object (self)
+    inherit custom_extension
+    method print ch =
+      (* Print the value of the CDATA attribute "print": *)
+      match self # node # attribute "print" with
+        Value s       -> output_string ch s
+      | Implied_value -> output_string ch "<missing>"
+      | Valuelist l   -> assert false   
+                         (* not possible because the att is CDATA *)
+  end
+
+class eltype_c =
+  object (self)
+    inherit custom_extension
+    method print ch = 
+      (* Print the contents of this element: *)
+      output_string ch (self # node # data)
+  end
+
+class null_extension =
+  object (self)
+    inherit custom_extension
+    method print ch = assert false
+  end

The remaining task is to configure the parser such that these extension classes +are actually used. Here another problem arises: It is not possible to +dynamically select the class of an object to be created. As workaround, +PXP allows the user to specify exemplar objects for +the various element types; instead of creating the nodes of the tree by +applying the new operator the nodes are produced by +duplicating the exemplars. As object duplication preserves the class of the +object, one can create fresh objects of every class for which previously an +exemplar has been registered.

Exemplars are meant as objects without contents, the only interesting thing is +that exemplars are instances of a certain class. The creation of an exemplar +for an element node can be done by: + +

let element_exemplar = new element_impl extension_exemplar
+ +And a data node exemplar is created by: + +
let data_exemplar = new data_impl extension_exemplar
+ +The classes element_impl and data_impl +are defined in the module Pxp_document. The constructors +initialize the fresh objects as empty objects, i.e. without children, without +data contents, and so on. The extension_exemplar is the +initial extension object the exemplars are associated with.

Once the exemplars are created and stored somewhere (e.g. in a hash table), you +can take an exemplar and create a concrete instance (with contents) by +duplicating it. As user of the parser you are normally not concerned with this +as this is part of the internal logic of the parser, but as background knowledge +it is worthwhile to mention that the two methods +create_element and create_data actually +perform the duplication of the exemplar for which they are invoked, +additionally apply modifications to the clone, and finally return the new +object. Moreover, the extension object is copied, too, and the new node object +is associated with the fresh extension object. Note that this is the reason why +every extension object must have a clone method.

The configuration of the set of exemplars is passed to the +parse_document_entity function as third argument. In our +example, this argument can be set up as follows: + +

let spec =
+  make_spec_from_alist
+    ~data_exemplar:            (new data_impl (new null_extension))
+    ~default_element_exemplar: (new element_impl (new null_extension))
+    ~element_alist:
+       [ "a",  new element_impl (new eltype_a);
+         "b",  new element_impl (new eltype_b);
+         "c",  new element_impl (new eltype_c);
+       ]
+    ()
+ +The ~element_alist function argument defines the mapping +from element types to exemplars as associative list. The argument +~data_exemplar specifies the exemplar for data nodes, and +the ~default_element_exemplar is used whenever the parser +finds an element type for which the associative list does not define an +exemplar.

The configuration is now complete. You can still use the same parsing +functions, only the initialization is a bit different. For example, call the +parser by: + +

let d = parse_document_entity default_config (from_file "doc.xml") spec
+ +Note that the resulting document d has a usable type; +especially the print method we added is visible. So you can +print your document by + +
d # root # extension # print stdout

This object-oriented approach looks rather complicated; this is mostly caused +by working around some problems of the strict typing system of O'Caml. Some +auxiliary concepts such as extensions were needed, but the practical +consequences are low. In the next section, one of the examples of the +distribution is explained, a converter from readme +documents to HTML.

Notes

[1]

The problem is that the subclass is +usually not a subtype in this case because O'Caml has a contravariant subtyping +rule.


PrevHomeNext
How to parse a document from an applicationUpExample: An HTML backend for the readme +DTD
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html new file mode 100644 index 000000000..674180172 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x738.html @@ -0,0 +1,1036 @@ +Example: An HTML backend for the readme +DTD
The PXP user's guide
PrevChapter 2. Using PXPNext

2.4. Example: An HTML backend for the readme +DTD

The converter from readme documents to HTML +documents follows strictly the approach to define one class per element +type. The HTML code is similar to the readme source, +because of this most elements can be converted in the following way: Given the +input element + +

<e>content</e>
+ +the conversion text is the concatenation of a computed prefix, the recursively +converted content, and a computed suffix.

Only one element type cannot be handled by this scheme: +footnote. Footnotes are collected while they are found in +the input text, and they are printed after the main text has been converted and +printed.

2.4.1. Header

open Pxp_types
+open Pxp_document

2.4.2. Type declarations

class type footnote_printer =
+  object
+    method footnote_to_html : store_type -> out_channel -> unit
+  end
+
+and store_type =
+  object
+    method alloc_footnote : footnote_printer -> int
+    method print_footnotes : out_channel -> unit
+  end
+;;

2.4.3. Class store

The store is a container for footnotes. You can add a +footnote by invoking alloc_footnote; the argument is an +object of the class footnote_printer, the method returns the +number of the footnote. The interesting property of a footnote is that it can +be converted to HTML, so a footnote_printer is an object +with a method footnote_to_html. The class +footnote which is defined below has a compatible method +footnote_to_html such that objects created from it can be +used as footnote_printers.

The other method, print_footnotes prints the footnotes as +definition list, and is typically invoked after the main material of the page +has already been printed. Every item of the list is printed by +footnote_to_html.

class store =
+  object (self)
+
+    val mutable footnotes = ( [] : (int * footnote_printer) list )
+    val mutable next_footnote_number = 1
+
+    method alloc_footnote n =
+      let number = next_footnote_number in
+      next_footnote_number <- number+1;
+      footnotes <- footnotes @ [ number, n ];
+      number
+
+    method print_footnotes ch =
+      if footnotes <> [] then begin
+	output_string ch "<hr align=left noshade=noshade width=\"30%\">\n";
+	output_string ch "<dl>\n";
+	List.iter
+	  (fun (_,n) -> 
+	     n # footnote_to_html (self : #store_type :> store_type) ch)
+	  footnotes;
+	output_string ch "</dl>\n";
+      end
+
+  end
+;;

2.4.4. Function escape_html

This function converts the characters <, >, &, and " to their HTML +representation. For example, +escape_html "<>" = "&lt;&gt;". Other +characters are left unchanged. + +

let escape_html s =
+  Str.global_substitute
+    (Str.regexp "<\\|>\\|&\\|\"")
+    (fun s ->
+      match Str.matched_string s with
+        "<" -> "&lt;"
+      | ">" -> "&gt;"
+      | "&" -> "&amp;"
+      | "\"" -> "&quot;"
+      | _ -> assert false)
+    s
+;;

2.4.5. Virtual class shared

This virtual class is the abstract superclass of the extension classes shown +below. It defines the standard methods clone, +node, and set_node, and declares the type +of the virtual method to_html. This method recursively +traverses the whole element tree, and prints the converted HTML code to the +output channel passed as second argument. The first argument is the reference +to the global store object which collects the footnotes. + +

class virtual shared =
+  object (self)
+
+    (* --- default_ext --- *)
+
+    val mutable node = (None : shared node option)
+
+    method clone = {< >} 
+    method node =
+      match node with
+          None ->
+            assert false
+        | Some n -> n
+    method set_node n =
+      node <- Some n
+
+    (* --- virtual --- *)
+
+    method virtual to_html : store -> out_channel -> unit
+
+  end
+;;

2.4.6. Class only_data

This class defines to_html such that the character data of +the current node is converted to HTML. Note that self is an +extension object, self # node is the node object, and +self # node # data returns the character data of the node. + +

class only_data =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch (escape_html (self # node # data))
+  end
+;;

2.4.7. Class readme

This class converts elements of type readme to HTML. Such an +element is (by definition) always the root element of the document. First, the +HTML header is printed; the title attribute of the element +determines the title of the HTML page. Some aspects of the HTML page can be +configured by setting certain parameter entities, for example the background +color, the text color, and link colors. After the header, the +body tag, and the headline have been printed, the contents +of the page are converted by invoking to_html on all +children of the current node (which is the root node). Then, the footnotes are +appended to this by telling the global store object to print +the footnotes. Finally, the end tags of the HTML pages are printed.

This class is an example how to access the value of an attribute: The value is +determined by invoking self # node # attribute "title". As +this attribute has been declared as CDATA and as being required, the value has +always the form Value s where s is the +string value of the attribute.

You can also see how entity contents can be accessed. A parameter entity object +can be looked up by self # node # dtd # par_entity "name", +and by invoking replacement_text the value of the entity +is returned after inner parameter and character entities have been +processed. Note that you must use gen_entity instead of +par_entity to access general entities.

class readme =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      (* output header *)
+      output_string 
+	ch "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">";
+      output_string
+	ch "<!-- WARNING! This is a generated file, do not edit! -->\n";
+      let title = 
+	match self # node # attribute "title" with
+	    Value s -> s
+	  | _ -> assert false
+      in
+      let html_header, _ =
+	try (self # node # dtd # par_entity "readme:html:header") 
+            # replacement_text
+	with WF_error _ -> "", false in
+      let html_trailer, _ =
+	try (self # node # dtd # par_entity "readme:html:trailer")
+            # replacement_text
+	with WF_error _ -> "", false in
+      let html_bgcolor, _ =
+	try (self # node # dtd # par_entity "readme:html:bgcolor")
+            # replacement_text
+	with WF_error _ -> "white", false in
+      let html_textcolor, _ =
+	try (self # node # dtd # par_entity "readme:html:textcolor")
+            # replacement_text
+	with WF_error _ -> "", false in
+      let html_alinkcolor, _ =
+	try (self # node # dtd # par_entity "readme:html:alinkcolor")
+            # replacement_text
+	with WF_error _ -> "", false in
+      let html_vlinkcolor, _ =
+	try (self # node # dtd # par_entity "readme:html:vlinkcolor")
+            # replacement_text
+	with WF_error _ -> "", false in
+      let html_linkcolor, _ =
+	try (self # node # dtd # par_entity "readme:html:linkcolor")
+            # replacement_text
+	with WF_error _ -> "", false in
+      let html_background, _ =
+	try (self # node # dtd # par_entity "readme:html:background")
+            # replacement_text
+	with WF_error _ -> "", false in
+
+      output_string ch "<html><header><title>\n";
+      output_string ch (escape_html title);
+      output_string ch "</title></header>\n";
+      output_string ch "<body ";
+      List.iter
+	(fun (name,value) ->
+	   if value <> "" then 
+	     output_string ch (name ^ "=\"" ^ escape_html value ^ "\" "))
+	[ "bgcolor",    html_bgcolor;
+	  "text",       html_textcolor;
+	  "link",       html_linkcolor;
+	  "alink",      html_alinkcolor;
+	  "vlink",      html_vlinkcolor;
+	];
+      output_string ch ">\n";
+      output_string ch html_header;
+      output_string ch "<h1>";
+      output_string ch (escape_html title);
+      output_string ch "</h1>\n";
+      (* process main content: *)
+      List.iter
+	(fun n -> n # extension # to_html store ch)
+	(self # node # sub_nodes);
+      (* now process footnotes *)
+      store # print_footnotes ch;
+      (* trailer *)
+      output_string ch html_trailer;
+      output_string ch "</html>\n";
+
+  end
+;;

2.4.8. Classes section, sect1, +sect2, and sect3

As the conversion process is very similar, the conversion classes of the three +section levels are derived from the more general section +class. The HTML code of the section levels only differs in the type of the +headline, and because of this the classes describing the section levels can be +computed by replacing the class argument the_tag of +section by the HTML name of the headline tag.

Section elements are converted to HTML by printing a headline and then +converting the contents of the element recursively. More precisely, the first +sub-element is always a title element, and the other +elements are the contents of the section. This structure is declared in the +DTD, and it is guaranteed that the document matches the DTD. Because of this +the title node can be separated from the rest without any checks.

Both the title node, and the body nodes are then converted to HTML by calling +to_html on them.

class section the_tag =
+  object (self)
+    inherit shared
+
+    val tag = the_tag
+
+    method to_html store ch =
+      let sub_nodes = self # node # sub_nodes in
+      match sub_nodes with
+	  title_node :: rest ->
+	    output_string ch ("<" ^ tag ^ ">\n");
+	    title_node # extension # to_html store ch;
+	    output_string ch ("\n</" ^ tag ^ ">");
+	    List.iter
+	      (fun n -> n # extension # to_html store ch)
+	      rest
+	| _ ->
+	    assert false
+  end
+;;
+
+class sect1 = section "h1";;
+class sect2 = section "h3";;
+class sect3 = section "h4";;

2.4.9. Classes map_tag, p, +em, ul, li

Several element types are converted to HTML by simply mapping them to +corresponding HTML element types. The class map_tag +implements this, and the class argument the_target_tag +determines the tag name to map to. The output consists of the start tag, the +recursively converted inner elements, and the end tag. + +

class map_tag the_target_tag =
+  object (self)
+    inherit shared
+
+    val target_tag = the_target_tag
+
+    method to_html store ch =
+      output_string ch ("<" ^ target_tag ^ ">\n");
+      List.iter
+	(fun n -> n # extension # to_html store ch)
+	(self # node # sub_nodes);
+      output_string ch ("\n</" ^ target_tag ^ ">");
+  end
+;;
+
+class p = map_tag "p";;
+class em = map_tag "b";;
+class ul = map_tag "ul";;
+class li = map_tag "li";;

2.4.10. Class br

Element of type br are mapped to the same HTML type. Note +that HTML forbids the end tag of br. + +

class br =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "<br>\n";
+      List.iter
+	(fun n -> n # extension # to_html store ch)
+	(self # node # sub_nodes);
+  end
+;;

2.4.11. Class code

The code type is converted to a pre +section (preformatted text). As the meaning of tabs is unspecified in HTML, +tabs are expanded to spaces. + +

class code =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      let data = self # node # data in
+      (* convert tabs *)
+      let l = String.length data in
+      let rec preprocess i column =
+	(* this is very ineffective but comprehensive: *)
+	if i < l then
+	  match data.[i] with
+	      '\t' ->
+		let n = 8 - (column mod 8) in
+		String.make n ' ' ^ preprocess (i+1) (column + n)
+	    | '\n' ->
+		"\n" ^ preprocess (i+1) 0
+	    | c ->
+		String.make 1 c ^ preprocess (i+1) (column + 1)
+	else
+	  ""
+      in
+      output_string ch "<p><pre>";
+      output_string ch (escape_html (preprocess 0 0));
+      output_string ch "</pre></p>";
+
+  end
+;;

2.4.12. Class a

Hyperlinks, expressed by the a element type, are converted +to the HTML a type. If the target of the hyperlink is given +by href, the URL of this attribute can be used +directly. Alternatively, the target can be given by +readmeref in which case the ".html" suffix must be added to +the file name.

Note that within a only #PCDATA is allowed, so the contents +can be converted directly by applying escape_html to the +character data contents. + +

class a =
+  object (self)
+    inherit shared
+
+    method to_html store ch =
+      output_string ch "<a ";
+      let href =
+	match self # node # attribute "href" with
+	    Value v -> escape_html v
+	  | Valuelist _ -> assert false
+	  | Implied_value ->
+	      begin match self # node # attribute "readmeref" with
+		  Value v -> escape_html v ^ ".html"
+		| Valuelist _ -> assert false
+		| Implied_value ->
+		    ""
+	      end
+      in
+      if href <> "" then
+	output_string ch ("href=\""  ^ href ^ "\"");
+      output_string ch ">";
+      output_string ch (escape_html (self # node # data));
+      output_string ch "</a>";
+	
+  end
+;;

2.4.13. Class footnote

The footnote class has two methods: +to_html to convert the footnote reference to HTML, and +footnote_to_html to convert the footnote text itself.

The footnote reference is converted to a local hyperlink; more precisely, to +two anchor tags which are connected with each other. The text anchor points to +the footnote anchor, and the footnote anchor points to the text anchor.

The footnote must be allocated in the store object. By +allocating the footnote, you get the number of the footnote, and the text of +the footnote is stored until the end of the HTML page is reached when the +footnotes can be printed. The to_html method stores simply +the object itself, such that the footnote_to_html method is +invoked on the same object that encountered the footnote.

The to_html only allocates the footnote, and prints the +reference anchor, but it does not print nor convert the contents of the +note. This is deferred until the footnotes actually get printed, i.e. the +recursive call of to_html on the sub nodes is done by +footnote_to_html.

Note that this technique does not work if you make another footnote within a +footnote; the second footnote gets allocated but not printed.

class footnote =
+  object (self)
+    inherit shared
+
+    val mutable footnote_number = 0
+
+    method to_html store ch =
+      let number = 
+	store # alloc_footnote (self : #shared :> footnote_printer) in
+      let foot_anchor = 
+	"footnote" ^ string_of_int number in
+      let text_anchor =
+	"textnote" ^ string_of_int number in
+      footnote_number <- number;
+      output_string ch ( "<a name=\"" ^ text_anchor ^ "\" href=\"#" ^ 
+			 foot_anchor ^ "\">[" ^ string_of_int number ^ 
+			 "]</a>" )
+
+    method footnote_to_html store ch =
+      (* prerequisite: we are in a definition list <dl>...</dl> *)
+      let foot_anchor = 
+	"footnote" ^ string_of_int footnote_number in
+      let text_anchor =
+	"textnote" ^ string_of_int footnote_number in
+      output_string ch ("<dt><a name=\"" ^ foot_anchor ^ "\" href=\"#" ^ 
+			text_anchor ^ "\">[" ^ string_of_int footnote_number ^ 
+			"]</a></dt>\n<dd>");
+      List.iter
+	(fun n -> n # extension # to_html store ch)
+	(self # node # sub_nodes);
+      output_string ch ("\n</dd>")
+ 
+  end
+;;

2.4.14. The specification of the document model

This code sets up the hash table that connects element types with the exemplars +of the extension classes that convert the elements to HTML. + +

open Pxp_yacc
+
+let tag_map =
+  make_spec_from_alist
+    ~data_exemplar:(new data_impl (new only_data))
+    ~default_element_exemplar:(new element_impl (new no_markup))
+    ~element_alist:
+      [ "readme", (new element_impl (new readme));
+	"sect1",  (new element_impl (new sect1));
+	"sect2",  (new element_impl (new sect2));
+	"sect3",  (new element_impl (new sect3));
+	"title",  (new element_impl (new no_markup));
+	"p",      (new element_impl (new p));
+	"br",     (new element_impl (new br));
+	"code",   (new element_impl (new code));
+	"em",     (new element_impl (new em));
+	"ul",     (new element_impl (new ul));
+	"li",     (new element_impl (new li));
+	"footnote", (new element_impl (new footnote : #shared :> shared));
+	"a",      (new element_impl (new a));
+      ]
+    ()
+;;


PrevHomeNext
Class-based processing of the node treeUpThe objects representing the document
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html b/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html new file mode 100644 index 000000000..cf177f88e --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/html/x939.html @@ -0,0 +1,2337 @@ +The class type node
The PXP user's guide
PrevChapter 3. The objects representing the documentNext

3.2. The class type node

From Pxp_document: + +

type node_type =
+  T_data
+| T_element of string
+| T_super_root
+| T_pinstr of string
+| T_comment
+and some other, reserved types
+;;
+
+class type [ 'ext ] node =
+  object ('self)
+    constraint 'ext = 'ext node #extension
+
+    (* General observers *)
+
+    method extension : 'ext
+    method dtd : dtd
+    method parent : 'ext node
+    method root : 'ext node
+    method sub_nodes : 'ext node list
+    method iter_nodes : ('ext node -> unit) -> unit
+    method iter_nodes_sibl : 
+           ('ext node option -> 'ext node -> 'ext node option -> unit) -> unit
+    method node_type : node_type
+    method encoding : Pxp_types.rep_encoding
+    method data : string
+    method position : (string * int * int)
+    method comment : string option
+    method pinstr : string -> proc_instruction list
+    method pinstr_names : string list
+    method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit
+
+    (* Attribute observers *)
+
+    method attribute : string -> Pxp_types.att_value
+    method required_string_attribute : string -> string
+    method optional_string_attribute : string -> string option
+    method required_list_attribute : string -> string list
+    method optional_list_attribute : string -> string list
+    method attribute_names : string list
+    method attribute_type : string -> Pxp_types.att_type
+    method attributes : (string * Pxp_types.att_value) list
+    method id_attribute_name : string
+    method id_attribute_value : string
+    method idref_attribute_names : string
+
+    (* Modifying methods *)
+
+    method add_node : ?force:bool -> 'ext node -> unit
+    method add_pinstr : proc_instruction -> unit
+    method delete : unit
+    method set_nodes : 'ext node list -> unit
+    method quick_set_attributes : (string * Pxp_types.att_value) list -> unit
+    method set_comment : string option -> unit
+
+    (* Cloning methods *)
+
+    method orphaned_clone : 'self
+    method orphaned_flat_clone : 'self
+    method create_element : 
+              ?position:(string * int * int) ->
+              dtd -> node_type -> (string * string) list ->
+                  'ext node
+    method create_data : dtd -> string -> 'ext node
+    method keep_always_whitespace_mode : unit
+
+    (* Validating methods *)
+
+    method local_validate : ?use_dfa:bool -> unit -> unit
+
+    (* ... Internal methods are undocumented. *)
+
+  end
+;;
+ +In the module Pxp_types you can find another type +definition that is important in this context: + +
type Pxp_types.att_value =
+    Value     of string
+  | Valuelist of string list
+  | Implied_value
+;;

3.2.1. The structure of document trees

A node represents either an element or a character data section. There are two +classes implementing the two aspects of nodes: element_impl +and data_impl. The latter class does not implement all +methods because some methods do not make sense for data nodes.

(Note: PXP also supports a mode which forces that processing instructions and +comments are represented as nodes of the document tree. However, these nodes +are instances of element_impl with node types +T_pinstr and T_comment, +respectively. This mode must be explicitly configured; the basic representation +knows only element and data nodes.)

The following figure +(A tree with element nodes, data nodes, and attributes) shows an example how +a tree is constructed from element and data nodes. The circular areas +represent element nodes whereas the ovals denote data nodes. Only elements +may have subnodes; data nodes are always leaves of the tree. The subnodes +of an element can be either element or data nodes; in both cases the O'Caml +objects storing the nodes have the class type node.

Attributes (the clouds in the picture) are not directly +integrated into the tree; there is always an extra link to the attribute +list. This is also true for processing instructions (not shown in the +picture). This means that there are separated access methods for attributes and +processing instructions.

Figure 3-1. A tree with element nodes, data nodes, and attributes

Only elements, data sections, attributes and processing +instructions (and comments, if configured) can, directly or indirectly, occur +in the document tree. It is impossible to add entity references to the tree; if +the parser finds such a reference, not the reference as such but the referenced +text (i.e. the tree representing the structured text) is included in the +tree.

Note that the parser collapses as much data material into one +data node as possible such that there are normally never two adjacent data +nodes. This invariant is enforced even if data material is included by entity +references or CDATA sections, or if a data sequence is interrupted by +comments. So a &amp; b <-- comment --> c <![CDATA[ +<> d]]> is represented by only one data node, for +instance. However, you can create document trees manually which break this +invariant; it is only the way the parser forms the tree.

Figure 3-2. Nodes are doubly linked trees

The node tree has links in both directions: Every node has a link to its parent +(if any), and it has links to the subnodes (see +figure Nodes are doubly linked trees). Obviously, +this doubly-linked structure simplifies the navigation in the tree; but has +also some consequences for the possible operations on trees.

Because every node must have at most one parent node, +operations are illegal if they violate this condition. The following figure +(A node can only be added if it is a root) shows on the left side +that node y is added to x as new subnode +which is allowed because y does not have a parent yet. The +right side of the picture illustrates what would happen if y +had a parent node; this is illegal because y would have two +parents after the operation.

Figure 3-3. A node can only be added if it is a root

The "delete" operation simply removes the links between two nodes. In the +picture (A deleted node becomes the root of the subtree) the node +x is deleted from the list of subnodes of +y. After that, x becomes the root of the +subtree starting at this node.

Figure 3-4. A deleted node becomes the root of the subtree

It is also possible to make a clone of a subtree; illustrated in +The clone of a subtree. In this case, the +clone is a copy of the original subtree except that it is no longer a +subnode. Because cloning never keeps the connection to the parent, the clones +are called orphaned.

Figure 3-5. The clone of a subtree

3.2.2. The methods of the class type node

General observers + .

  • extension: The reference to the extension object which +belongs to this node (see ...).

  • dtd: Returns a reference to the global DTD. All nodes +of a tree must share the same DTD.

  • parent: Get the father node. Raises +Not_found in the case the node does not have a +parent, i.e. the node is the root.

  • root: Gets the reference to the root node of the tree. +Every node is contained in a tree with a root, so this method always +succeeds. Note that this method searches the root, +which costs time proportional to the length of the path to the root.

  • sub_nodes: Returns references to the children. The returned +list reflects the order of the children. For data nodes, this method returns +the empty list.

  • iter_nodes f: Iterates over the children, and calls +f for every child in turn.

  • iter_nodes_sibl f: Iterates over the children, and calls +f for every child in turn. f gets as +arguments the previous node, the current node, and the next node.

  • node_type: Returns either T_data which +means that the node is a data node, or T_element n +which means that the node is an element of type n. +If configured, possible node types are also T_pinstr t +indicating that the node represents a processing instruction with target +t, and T_comment in which case the node +is a comment.

  • encoding: Returns the encoding of the strings.

  • data: Returns the character data of this node and all +children, concatenated as one string. The encoding of the string is what +the method encoding returns. +- For data nodes, this method simply returns the represented characters. +For elements, the meaning of the method has been extended such that it +returns something useful, i.e. the effectively contained characters, without +markup. (For T_pinstr and T_comment +nodes, the method returns the empty string.)

  • position: If configured, this method returns the position of +the element as triple (entity, line, byteposition). For data nodes, the +position is not stored. If the position is not available the triple +"?", 0, 0 is returned.

  • comment: Returns Some text for comment +nodes, and None for other nodes. The text +is everything between the comment delimiters <-- and +-->.

  • pinstr n: Returns all processing instructions that are +directly contained in this element and that have a target +specification of n. The target is the first word after +the <?.

  • pinstr_names: Returns the list of all targets of processing +instructions directly contained in this element.

  • write s enc: Prints the node and all subnodes to the passed +output stream as valid XML text, using the passed external encoding.

+

Attribute observers + .

  • attribute n: Returns the value of the attribute with name +n. This method returns a value for every declared +attribute, and it raises Not_found for any undeclared +attribute. Note that it even returns a value if the attribute is actually +missing but is declared as #IMPLIED or has a default +value. - Possible values are: +

    • Implied_value: The attribute has been declared with the +keyword #IMPLIED, and the attribute is missing in the +attribute list of this element.

    • Value s: The attribute has been declared as type +CDATA, as ID, as +IDREF, as ENTITY, or as +NMTOKEN, or as enumeration or notation, and one of the two +conditions holds: (1) The attribute value is present in the attribute list in +which case the value is returned in the string s. (2) The +attribute has been omitted, and the DTD declared the attribute with a default +value. The default value is returned in s. +- Summarized, Value s is returned for non-implied, non-list +attribute values.

    • Valuelist l: The attribute has been declared as type +IDREFS, as ENTITIES, or +as NMTOKENS, and one of the two conditions holds: (1) The +attribute value is present in the attribute list in which case the +space-separated tokens of the value are returned in the string list +l. (2) The attribute has been omitted, and the DTD declared +the attribute with a default value. The default value is returned in +l. +- Summarized, Valuelist l is returned for all list-type +attribute values.

    + +Note that before the attribute value is returned, the value is normalized. This +means that newlines are converted to spaces, and that references to character +entities (i.e. &#n;) and +general entities +(i.e. &name;) are expanded; +if necessary, expansion is performed recursively.

    In well-formedness mode, there is no DTD which could declare an +attribute. Because of this, every occuring attribute is considered as a CDATA +attribute.

  • required_string_attribute n: returns the Value attribute +called n, or the Valuelist attribute as a string where the list elements +are separated by spaces. If the attribute value is implied, or if the +attribute does not exists, the method will fail. - This method is convenient +if you expect a non-implied and non-list attribute value.

  • optional_string_attribute n: returns the Value attribute +called n, or the Valuelist attribute as a string where the list elements +are separated by spaces. If the attribute value is implied, or if the +attribute does not exists, the method returns None. - This method is +convenient if you expect a non-list attribute value including the implied +value.

  • required_list_attribute n: returns the Valuelist attribute +called n, or the Value attribute as a list with a single element. +If the attribute value is implied, or if the +attribute does not exists, the method will fail. - This method is +convenient if you expect a list attribute value.

  • optional_list_attribute n: returns the Valuelist attribute +called n, or the Value attribute as a list with a single element. +If the attribute value is implied, or if the +attribute does not exists, an empty list will be returned. - This method +is convenient if you expect a list attribute value or the implied value.

  • attribute_names: returns the list of all attribute names of +this element. As this is a validating parser, this list is equal to the +list of declared attributes.

  • attribute_type n: returns the type of the attribute called +n. See the module Pxp_types for a +description of the encoding of the types.

  • attributes: returns the list of pairs of names and values +for all attributes of +this element.

  • id_attribute_name: returns the name of the attribute that is +declared with type ID. There is at most one such attribute. The method raises +Not_found if there is no declared ID attribute for the +element type.

  • id_attribute_value: returns the value of the attribute that +is declared with type ID. There is at most one such attribute. The method raises +Not_found if there is no declared ID attribute for the +element type.

  • idref_attribute_names: returns the list of attribute names +that are declared as IDREF or IDREFS.

+

Modifying methods + . The following methods are only defined for element nodes (more exactly: +the methods are defined for data nodes, too, but fail always). + +

  • add_node sn: Adds sub node sn to the list +of children. This operation is illustrated in the picture +A node can only be added if it is a root. This method expects that +sn is a root, and it requires that sn and +the current object share the same DTD.

    Because add_node is the method the parser itself uses +to add new nodes to the tree, it performs by default some simple validation +checks: If the content model is a regular expression, it is not allowed to add +data nodes to this node unless the new nodes consist only of whitespace. In +this case, the new data nodes are silently dropped (you can change this by +invoking keep_always_whitespace_mode).

    If the document is flagged as stand-alone, these data nodes only +containing whitespace are even forbidden if the element declaration is +contained in an external entity. This case is detected and rejected.

    If the content model is EMPTY, it is not allowed to +add any data node unless the data node is empty. In this case, the new data +node is silently dropped.

    These checks only apply if there is a DTD. In well-formedness mode, it is +assumed that every element is declared with content model +ANY which prohibits any validation check. Furthermore, you +turn these checks off by passing ~force:true as first +argument.

  • add_pinstr pi: Adds the processing instruction +pi to the list of processing instructions.

  • delete: Deletes this node from the tree. After this +operation, this node is no longer the child of the former father node; and the +node loses the connection to the father as well. This operation is illustrated +by the figure A deleted node becomes the root of the subtree.

  • set_nodes nl: Sets the list of children to +nl. It is required that every member of nl +is a root, and that all members and the current object share the same DTD. +Unlike add_node, no validation checks are performed.

  • quick_set_attributes atts: sets the attributes of this +element to atts. It is not checked +whether atts matches the DTD or not; it is up to the +caller of this method to ensure this. (This method may be useful to transform +the attribute values, i.e. apply a mapping to every attribute.)

  • set_comment text: This method is only applicable to +T_comment nodes; it sets the comment text contained by such +nodes.

Cloning methods + .

  • orphaned_clone: Returns a clone of the node and the complete +tree below this node (deep clone). The clone does not have a parent (i.e. the +reference to the parent node is not cloned). While +copying the subtree, strings are skipped; it is likely that the original tree +and the copy tree share strings. Extension objects are cloned by invoking +the clone method on the original objects; how much of +the extension objects is cloned depends on the implemention of this method.

    This operation is illustrated by the figure +The clone of a subtree.

  • orphaned_flat_clone: Returns a clone of the node, +but sets the list of sub nodes to [], i.e. the sub nodes are not cloned.

  • +create_element dtd nt al: Returns a flat copy of this node +(which must be an element) with the following modifications: The DTD is set to +dtd; the node type is set to nt, and the +new attribute list is set to al (given as list of +(name,value) pairs). The copy does not have children nor a parent. It does not +contain processing instructions. See +the example below.

    Note that you can specify the position of the new node +by the optional argument ~position.

  • +create_data dtd cdata: Returns a flat copy of this node +(which must be a data node) with the following modifications: The DTD is set to +dtd; the node type is set to T_data; the +attribute list is empty (data nodes never have attributes); the list of +children and PIs is empty, too (same reason). The new node does not have a +parent. The value cdata is the new character content of the +node. See +the example below.

  • keep_always_whitespace_mode: Even data nodes which are +normally dropped because they only contain ignorable whitespace, can added to +this node once this mode is turned on. (This mode is useful to produce +canonical XML.)

Validating methods + . There is one method which locally validates the node, i.e. checks whether the +subnodes match the content model of this node. + +

  • local_validate: Checks that this node conforms to the +DTD by comparing the type of the subnodes with the content model for this +node. (Applications need not call this method unless they add new nodes +themselves to the tree.)

3.2.3. The class element_impl

This class is an implementation of node which +realizes element nodes: + +

class [ 'ext ] element_impl : 'ext -> [ 'ext ] node

Constructor. You can create a new instance by + +

new element_impl extension_object
+ +which creates a special form of empty element which already contains a +reference to the extension_object, but is +otherwise empty. This special form is called an +exemplar. The purpose of exemplars is that they serve as +patterns that can be duplicated and filled with data. The method +create_element is designed to perform this action.

Example. First, create an exemplar by + +

let exemplar_ext = ... in
+let exemplar     = new element_impl exemplar_ext in
+ +The exemplar is not used in node trees, but only as +a pattern when the element nodes are created: + +
let element = exemplar # create_element dtd (T_element name) attlist 
+ +The element is a copy of exemplar +(even the extension exemplar_ext has been copied) +which ensures that element and its extension are objects +of the same class as the exemplars; note that you need not to pass a +class name or other meta information. The copy is initially connected +with the dtd, it gets a node type, and the attribute list +is filled. The element is now fully functional; it can +be added to another element as child, and it can contain references to +subnodes.

3.2.4. The class data_impl

This class is an implementation of node which +should be used for all character data nodes: + +

class [ 'ext ] data_impl : 'ext -> [ 'ext ] node

Constructor. You can create a new instance by + +

new data_impl extension_object
+ +which creates an empty exemplar node which is connected to +extension_object. The node does not contain a +reference to any DTD, and because of this it cannot be added to node trees.

To get a fully working data node, apply the method +create_data to the exemplar (see example).

Example. First, create an exemplar by + +

let exemplar_ext = ... in
+let exemplar     = new exemplar_ext data_impl in
+ +The exemplar is not used in node trees, but only as +a pattern when the data nodes are created: + +
let data_node = exemplar # create_data dtd "The characters contained in the data node" 
+ +The data_node is a copy of exemplar. +The copy is initially connected +with the dtd, and it is filled with character material. +The data_node is now fully functional; it can +be added to an element as child.

3.2.5. The type spec

The type spec defines a way to handle the details of +creating nodes from exemplars. + +

type 'ext spec
+constraint 'ext = 'ext node #extension
+
+val make_spec_from_mapping :
+      ?super_root_exemplar : 'ext node ->
+      ?comment_exemplar : 'ext node ->
+      ?default_pinstr_exemplar : 'ext node ->
+      ?pinstr_mapping : (string, 'ext node) Hashtbl.t ->
+      data_exemplar: 'ext node ->
+      default_element_exemplar: 'ext node ->
+      element_mapping: (string, 'ext node) Hashtbl.t -> 
+      unit -> 
+        'ext spec
+
+val make_spec_from_alist :
+      ?super_root_exemplar : 'ext node ->
+      ?comment_exemplar : 'ext node ->
+      ?default_pinstr_exemplar : 'ext node ->
+      ?pinstr_alist : (string * 'ext node) list ->
+      data_exemplar: 'ext node ->
+      default_element_exemplar: 'ext node ->
+      element_alist: (string * 'ext node) list -> 
+      unit -> 
+        'ext spec
+ +The two functions make_spec_from_mapping and +make_spec_from_alist create spec +values. Both functions are functionally equivalent and the only difference is +that the first function prefers hashtables and the latter associative lists to +describe mappings from names to exemplars.

You can specify exemplars for the various kinds of nodes that need to be +generated when an XML document is parsed: + +

  • ~super_root_exemplar: This exemplar +is used to create the super root. This special node is only created if the +corresponding configuration option has been selected; it is the parent node of +the root node which may be convenient if every working node must have a parent.

  • ~comment_exemplar: This exemplar is +used when a comment node must be created. Note that such nodes are only created +if the corresponding configuration option is "on".

  • ~default_pinstr_exemplar: If a node +for a processing instruction must be created, and the instruction is not listed +in the table passed by ~pinstr_mapping or +~pinstr_alist, this exemplar is used. +Again the configuration option must be "on" in order to create such nodes at +all.

  • ~pinstr_mapping or +~pinstr_alist: Map the target names of processing +instructions to exemplars. These mappings are only used when nodes for +processing instructions are created.

  • ~data_exemplar: The exemplar for +ordinary data nodes.

  • ~default_element_exemplar: This +exemplar is used if an element node must be created, but the element type +cannot be found in the tables element_mapping or +element_alist.

  • ~element_mapping or +~element_alist: Map the element types to exemplars. These +mappings are used to create element nodes.

+ +In most cases, you only want to create spec values to pass +them to the parser functions found in Pxp_yacc. However, it +might be useful to apply spec values directly.

The following functions create various types of nodes by selecting the +corresponding exemplar from the passed spec value, and by +calling create_element or create_data on +the exemplar. + +

val create_data_node : 
+      'ext spec -> 
+      dtd -> 
+      (* data material: *) string -> 
+          'ext node
+
+val create_element_node : 
+      ?position:(string * int * int) ->
+      'ext spec -> 
+      dtd -> 
+      (* element type: *) string -> 
+      (* attributes: *) (string * string) list -> 
+          'ext node
+
+val create_super_root_node :
+      ?position:(string * int * int) ->
+      'ext spec -> 
+       dtd -> 
+           'ext node
+
+val create_comment_node :
+      ?position:(string * int * int) ->
+      'ext spec -> 
+      dtd -> 
+      (* comment text: *) string -> 
+          'ext node
+
+val create_pinstr_node :
+      ?position:(string * int * int) ->
+      'ext spec -> 
+      dtd -> 
+      proc_instruction -> 
+          'ext node

3.2.6. Examples

Building trees. Here is the piece of code that creates the tree of +the figure A tree with element nodes, data nodes, and attributes. The extension +object and the DTD are beyond the scope of this example. + +

let exemplar_ext = ... (* some extension *) in
+let dtd = ... (* some DTD *) in
+
+let element_exemplar = new element_impl exemplar_ext in
+let data_exemplar    = new data_impl    exemplar_ext in
+
+let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"]
+and b1 = element_exemplar # create_element dtd (T_element "b") []
+and c1 = element_exemplar # create_element dtd (T_element "c") []
+and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"]
+in
+
+let cherries = data_exemplar # create_data dtd "Cherries" in
+let orange   = data_exemplar # create_data dtd "An orange" in
+
+a1 # add_node b1;
+a1 # add_node c1;
+b1 # add_node a2;
+b1 # add_node cherries;
+a2 # add_node orange;
+ +Alternatively, the last block of statements could also be written as: + +
a1 # set_nodes [b1; c1];
+b1 # set_nodes [a2; cherries];
+a2 # set_nodes [orange];
+ +The root of the tree is a1, i.e. it is true that + +
x # root == a1
+ +for every x from { a1, a2, +b1, c1, cherries, +orange }.

Furthermore, the following properties hold: + +

  a1 # attribute "att" = Value "apple"
+& a2 # attribute "att" = Value "orange"
+
+& cherries # data = "Cherries"
+&   orange # data = "An orange"
+&       a1 # data = "CherriesAn orange"
+
+&       a1 # node_type = T_element "a"
+&       a2 # node_type = T_element "a"
+&       b1 # node_type = T_element "b"
+&       c1 # node_type = T_element "c"
+& cherries # node_type = T_data
+&   orange # node_type = T_data
+
+&       a1 # sub_nodes = [ b1; c1 ]
+&       a2 # sub_nodes = [ orange ]
+&       b1 # sub_nodes = [ a2; cherries ]
+&       c1 # sub_nodes = []
+& cherries # sub_nodes = []
+&   orange # sub_nodes = []
+
+&       a2 # parent == a1
+&       b1 # parent == b1
+&       c1 # parent == a1
+& cherries # parent == b1
+&   orange # parent == a2

Searching nodes. The following function searches all nodes of a tree +for which a certain condition holds: + +

let rec search p t =
+  if p t then
+    t :: search_list p (t # sub_nodes)
+  else
+    search_list p (t # sub_nodes)
+
+and search_list p l =
+  match l with
+    []      -> []
+  | t :: l' -> (search p t) @ (search_list p l')
+;;

For example, if you want to search all elements of a certain +type et, the function search can be +applied as follows: + +

let search_element_type et t =
+  search (fun x -> x # node_type = T_element et) t
+;;

Getting attribute values. Suppose we have the declaration: + +

<!ATTLIST e a CDATA #REQUIRED
+            b CDATA #IMPLIED
+            c CDATA "12345">
+ +In this case, every element e must have an attribute +a, otherwise the parser would indicate an error. If +the O'Caml variable n holds the node of the tree +corresponding to the element, you can get the value of the attribute +a by + +
let value_of_a = n # required_string_attribute "a"
+ +which is more or less an abbreviation for + +
let value_of_a = 
+  match n # attribute "a" with
+    Value s -> s
+  | _       -> assert false
+ +- as the attribute is required, the attribute method always +returns a Value.

In contrast to this, the attribute b can be +omitted. In this case, the method required_string_attribute +works only if the attribute is there, and the method will fail if the attribute +is missing. To get the value, you can apply the method +optional_string_attribute: + +

let value_of_b = n # optional_string_attribute "b"
+ +Now, value_of_b is of type string option, +and None represents the omitted attribute. Alternatively, +you could also use attribute: + +
let value_of_b = 
+  match n # attribute "b" with
+    Value s       -> Some s
+  | Implied_value -> None
+  | _             -> assert false

The attribute c behaves much like +a, because it has always a value. If the attribute is +omitted, the default, here "12345", will be returned instead. Because of this, +you can again use required_string_attribute to get the +value.

The type CDATA is the most general string +type. The types NMTOKEN, ID, +IDREF, ENTITY, and all enumerators and +notations are special forms of string types that restrict the possible +values. From O'Caml, they behave like CDATA, i.e. you can +use the methods required_string_attribute and +optional_string_attribute, too.

In contrast to this, the types NMTOKENS, +IDREFS, and ENTITIES mean lists of +strings. Suppose we have the declaration: + +

<!ATTLIST f d NMTOKENS #REQUIRED
+            e NMTOKENS #IMPLIED>
+ +The type NMTOKENS stands for lists of space-separated +tokens; for example the value "1 abc 23ef" means the list +["1"; "abc"; "23ef"]. (Again, IDREFS +and ENTITIES have more restricted values.) To get the +value of attribute d, one can use + +
let value_of_d = n # required_list_attribute "d"
+ +or + +
let value_of_d = 
+  match n # attribute "d" with
+    Valuelist l -> l
+  | _           -> assert false
+ +As d is required, the attribute cannot be omitted, and +the attribute method returns always a +Valuelist.

For optional attributes like e, apply + +

let value_of_e = n # optional_list_attribute "e"
+ +or + +
let value_of_e = 
+  match n # attribute "e" with
+    Valuelist l   -> l
+  | Implied_value -> []
+  | _             -> assert false
+ +Here, the case that the attribute is missing counts like the empty list.

3.2.7. Iterators

There are also several iterators in Pxp_document; please see +the mli file for details. You can find examples for them in the +"simple_transformation" directory. + +

val find : ?deeply:bool -> 
+           f:('ext node -> bool) -> 'ext node -> 'ext node
+
+val find_all : ?deeply:bool ->
+               f:('ext node -> bool) -> 'ext node -> 'ext node list
+
+val find_element : ?deeply:bool ->
+                   string -> 'ext node -> 'ext node
+
+val find_all_elements : ?deeply:bool ->
+                        string -> 'ext node -> 'ext node list
+
+exception Skip
+val map_tree :  pre:('exta node -> 'extb node) ->
+               ?post:('extb node -> 'extb node) ->
+               'exta node -> 
+                   'extb node
+
+
+val map_tree_sibl : 
+        pre: ('exta node option -> 'exta node -> 'exta node option -> 
+                  'extb node) ->
+       ?post:('extb node option -> 'extb node -> 'extb node option -> 
+                  'extb node) ->
+       'exta node -> 
+           'extb node
+
+val iter_tree : ?pre:('ext node -> unit) ->
+                ?post:('ext node -> unit) ->
+                'ext node -> 
+                    unit
+
+val iter_tree_sibl :
+       ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) ->
+       ?post:('ext node option -> 'ext node -> 'ext node option -> unit) ->
+       'ext node -> 
+           unit


PrevHomeNext
The objects representing the documentUpThe class type extension
\ No newline at end of file diff --git a/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps b/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps new file mode 100644 index 000000000..3a98c7964 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/ps/markup.ps @@ -0,0 +1,8866 @@ +%!PS-Adobe-2.0 +%%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software +%%Pages: 96 +%%PageOrder: Ascend +%%BoundingBox: 0 0 596 842 +%%DocumentFonts: Helvetica-Bold Times-Roman Times-Bold Times-Italic +%%+ Courier Courier-Oblique Helvetica-BoldOblique Courier-Bold +%%DocumentPaperSizes: a4 +%%EndComments +%DVIPSWebPage: (www.radicaleye.com) +%DVIPSCommandLine: dvips -f +%DVIPSParameters: dpi=600, compressed +%DVIPSSource: TeX output 2000.08.30:1757 +%%BeginProcSet: texc.pro +%! +/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S +N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72 +mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0 +0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{ +landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize +mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[ +matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round +exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{ +statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0] +N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin +/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array +/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2 +array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N +df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A +definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get +}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub} +B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr +1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3 +1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx +0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx +sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{ +rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp +gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B +/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{ +/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{ +A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy +get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse} +ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp +fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17 +{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add +chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{ +1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop} +forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn +/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put +}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{ +bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A +mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{ +SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{ +userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X +1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4 +index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N +/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{ +/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT) +(LaserWriter 16/600)]{A length product length le{A length product exch 0 +exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse +end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask +grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot} +imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round +exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto +fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p +delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M} +B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{ +p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S +rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end + +%%EndProcSet +%%BeginProcSet: 8r.enc +% @@psencodingfile@{ +% author = "S. Rahtz, P. MacKay, Alan Jeffrey, B. Horn, K. Berry", +% version = "0.6", +% date = "1 July 1998", +% filename = "8r.enc", +% email = "tex-fonts@@tug.org", +% docstring = "Encoding for TrueType or Type 1 fonts +% to be used with TeX." +% @} +% +% Idea is to have all the characters normally included in Type 1 fonts +% available for typesetting. This is effectively the characters in Adobe +% Standard Encoding + ISO Latin 1 + extra characters from Lucida. +% +% Character code assignments were made as follows: +% +% (1) the Windows ANSI characters are almost all in their Windows ANSI +% positions, because some Windows users cannot easily reencode the +% fonts, and it makes no difference on other systems. The only Windows +% ANSI characters not available are those that make no sense for +% typesetting -- rubout (127 decimal), nobreakspace (160), softhyphen +% (173). quotesingle and grave are moved just because it's such an +% irritation not having them in TeX positions. +% +% (2) Remaining characters are assigned arbitrarily to the lower part +% of the range, avoiding 0, 10 and 13 in case we meet dumb software. +% +% (3) Y&Y Lucida Bright includes some extra text characters; in the +% hopes that other PostScript fonts, perhaps created for public +% consumption, will include them, they are included starting at 0x12. +% +% (4) Remaining positions left undefined are for use in (hopefully) +% upward-compatible revisions, if someday more characters are generally +% available. +% +% (5) hyphen appears twice for compatibility with both +% ASCII and Windows. +% +/TeXBase1Encoding [ +% 0x00 (encoded characters from Adobe Standard not in Windows 3.1) + /.notdef /dotaccent /fi /fl + /fraction /hungarumlaut /Lslash /lslash + /ogonek /ring /.notdef + /breve /minus /.notdef +% These are the only two remaining unencoded characters, so may as +% well include them. + /Zcaron /zcaron +% 0x10 + /caron /dotlessi +% (unusual TeX characters available in, e.g., Lucida Bright) + /dotlessj /ff /ffi /ffl + /.notdef /.notdef /.notdef /.notdef + /.notdef /.notdef /.notdef /.notdef + % very contentious; it's so painful not having quoteleft and quoteright + % at 96 and 145 that we move the things normally found there to here. + /grave /quotesingle +% 0x20 (ASCII begins) + /space /exclam /quotedbl /numbersign + /dollar /percent /ampersand /quoteright + /parenleft /parenright /asterisk /plus /comma /hyphen /period /slash +% 0x30 + /zero /one /two /three /four /five /six /seven + /eight /nine /colon /semicolon /less /equal /greater /question +% 0x40 + /at /A /B /C /D /E /F /G /H /I /J /K /L /M /N /O +% 0x50 + /P /Q /R /S /T /U /V /W + /X /Y /Z /bracketleft /backslash /bracketright /asciicircum /underscore +% 0x60 + /quoteleft /a /b /c /d /e /f /g /h /i /j /k /l /m /n /o +% 0x70 + /p /q /r /s /t /u /v /w + /x /y /z /braceleft /bar /braceright /asciitilde + /.notdef % rubout; ASCII ends +% 0x80 + /.notdef /.notdef /quotesinglbase /florin + /quotedblbase /ellipsis /dagger /daggerdbl + /circumflex /perthousand /Scaron /guilsinglleft + /OE /.notdef /.notdef /.notdef +% 0x90 + /.notdef /.notdef /.notdef /quotedblleft + /quotedblright /bullet /endash /emdash + /tilde /trademark /scaron /guilsinglright + /oe /.notdef /.notdef /Ydieresis +% 0xA0 + /.notdef % nobreakspace + /exclamdown /cent /sterling + /currency /yen /brokenbar /section + /dieresis /copyright /ordfeminine /guillemotleft + /logicalnot + /hyphen % Y&Y (also at 45); Windows' softhyphen + /registered + /macron +% 0xD0 + /degree /plusminus /twosuperior /threesuperior + /acute /mu /paragraph /periodcentered + /cedilla /onesuperior /ordmasculine /guillemotright + /onequarter /onehalf /threequarters /questiondown +% 0xC0 + /Agrave /Aacute /Acircumflex /Atilde /Adieresis /Aring /AE /Ccedilla + /Egrave /Eacute /Ecircumflex /Edieresis + /Igrave /Iacute /Icircumflex /Idieresis +% 0xD0 + /Eth /Ntilde /Ograve /Oacute + /Ocircumflex /Otilde /Odieresis /multiply + /Oslash /Ugrave /Uacute /Ucircumflex + /Udieresis /Yacute /Thorn /germandbls +% 0xE0 + /agrave /aacute /acircumflex /atilde + /adieresis /aring /ae /ccedilla + /egrave /eacute /ecircumflex /edieresis + /igrave /iacute /icircumflex /idieresis +% 0xF0 + /eth /ntilde /ograve /oacute + /ocircumflex /otilde /odieresis /divide + /oslash /ugrave /uacute /ucircumflex + /udieresis /yacute /thorn /ydieresis +] def + +%%EndProcSet +%%BeginProcSet: texps.pro +%! +TeXDict begin/rf{findfont dup length 1 add dict begin{1 index/FID ne 2 +index/UniqueID ne and{def}{pop pop}ifelse}forall[1 index 0 6 -1 roll +exec 0 exch 5 -1 roll VResolution Resolution div mul neg 0 0]/Metrics +exch def dict begin Encoding{exch dup type/integertype ne{pop pop 1 sub +dup 0 le{pop}{[}ifelse}{FontMatrix 0 get div Metrics 0 get div def} +ifelse}forall Metrics/Metrics currentdict end def[2 index currentdict +end definefont 3 -1 roll makefont/setfont cvx]cvx def}def/ObliqueSlant{ +dup sin S cos div neg}B/SlantFont{4 index mul add}def/ExtendFont{3 -1 +roll mul exch}def/ReEncodeFont{CharStrings rcheck{/Encoding false def +dup[exch{dup CharStrings exch known not{pop/.notdef/Encoding true def} +if}forall Encoding{]exch pop}{cleartomark}ifelse}if/Encoding exch def} +def end + +%%EndProcSet +%%BeginProcSet: special.pro +%! +TeXDict begin/SDict 200 dict N SDict begin/@SpecialDefaults{/hs 612 N +/vs 792 N/ho 0 N/vo 0 N/hsc 1 N/vsc 1 N/ang 0 N/CLIP 0 N/rwiSeen false N +/rhiSeen false N/letter{}N/note{}N/a4{}N/legal{}N}B/@scaleunit 100 N +/@hscale{@scaleunit div/hsc X}B/@vscale{@scaleunit div/vsc X}B/@hsize{ +/hs X/CLIP 1 N}B/@vsize{/vs X/CLIP 1 N}B/@clip{/CLIP 2 N}B/@hoffset{/ho +X}B/@voffset{/vo X}B/@angle{/ang X}B/@rwi{10 div/rwi X/rwiSeen true N}B +/@rhi{10 div/rhi X/rhiSeen true N}B/@llx{/llx X}B/@lly{/lly X}B/@urx{ +/urx X}B/@ury{/ury X}B/magscale true def end/@MacSetUp{userdict/md known +{userdict/md get type/dicttype eq{userdict begin md length 10 add md +maxlength ge{/md md dup length 20 add dict copy def}if end md begin +/letter{}N/note{}N/legal{}N/od{txpose 1 0 mtx defaultmatrix dtransform S +atan/pa X newpath clippath mark{transform{itransform moveto}}{transform{ +itransform lineto}}{6 -2 roll transform 6 -2 roll transform 6 -2 roll +transform{itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll +curveto}}{{closepath}}pathforall newpath counttomark array astore/gc xdf +pop ct 39 0 put 10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack} +if}N/txpose{pxs pys scale ppr aload pop por{noflips{pop S neg S TR pop 1 +-1 scale}if xflip yflip and{pop S neg S TR 180 rotate 1 -1 scale ppr 3 +get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip +yflip not and{pop S neg S TR pop 180 rotate ppr 3 get ppr 1 get neg sub +neg 0 TR}if yflip xflip not and{ppr 1 get neg ppr 0 get neg TR}if}{ +noflips{TR pop pop 270 rotate 1 -1 scale}if xflip yflip and{TR pop pop +90 rotate 1 -1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get +neg sub neg TR}if xflip yflip not and{TR pop pop 90 rotate ppr 3 get ppr +1 get neg sub neg 0 TR}if yflip xflip not and{TR pop pop 270 rotate ppr +2 get ppr 0 get neg sub neg 0 S TR}if}ifelse scaleby96{ppr aload pop 4 +-1 roll add 2 div 3 1 roll add 2 div 2 copy TR .96 dup scale neg S neg S +TR}if}N/cp{pop pop showpage pm restore}N end}if}if}N/normalscale{ +Resolution 72 div VResolution 72 div neg scale magscale{DVImag dup scale +}if 0 setgray}N/psfts{S 65781.76 div N}N/startTexFig{/psf$SavedState +save N userdict maxlength dict begin/magscale true def normalscale +currentpoint TR/psf$ury psfts/psf$urx psfts/psf$lly psfts/psf$llx psfts +/psf$y psfts/psf$x psfts currentpoint/psf$cy X/psf$cx X/psf$sx psf$x +psf$urx psf$llx sub div N/psf$sy psf$y psf$ury psf$lly sub div N psf$sx +psf$sy scale psf$cx psf$sx div psf$llx sub psf$cy psf$sy div psf$ury sub +TR/showpage{}N/erasepage{}N/copypage{}N/p 3 def @MacSetUp}N/doclip{ +psf$llx psf$lly psf$urx psf$ury currentpoint 6 2 roll newpath 4 copy 4 2 +roll moveto 6 -1 roll S lineto S lineto S lineto closepath clip newpath +moveto}N/endTexFig{end psf$SavedState restore}N/@beginspecial{SDict +begin/SpecialSave save N gsave normalscale currentpoint TR +@SpecialDefaults count/ocount X/dcount countdictstack N}N/@setspecial{ +CLIP 1 eq{newpath 0 0 moveto hs 0 rlineto 0 vs rlineto hs neg 0 rlineto +closepath clip}if ho vo TR hsc vsc scale ang rotate rwiSeen{rwi urx llx +sub div rhiSeen{rhi ury lly sub div}{dup}ifelse scale llx neg lly neg TR +}{rhiSeen{rhi ury lly sub div dup scale llx neg lly neg TR}if}ifelse +CLIP 2 eq{newpath llx lly moveto urx lly lineto urx ury lineto llx ury +lineto closepath clip}if/showpage{}N/erasepage{}N/copypage{}N newpath}N +/@endspecial{count ocount sub{pop}repeat countdictstack dcount sub{end} +repeat grestore SpecialSave restore end}N/@defspecial{SDict begin}N +/@fedspecial{end}B/li{lineto}B/rl{rlineto}B/rc{rcurveto}B/np{/SaveX +currentpoint/SaveY X N 1 setlinecap newpath}N/st{stroke SaveX SaveY +moveto}N/fil{fill SaveX SaveY moveto}N/ellipse{/endangle X/startangle X +/yrad X/xrad X/savematrix matrix currentmatrix N TR xrad yrad scale 0 0 +1 startangle endangle arc savematrix setmatrix}N end + +%%EndProcSet +%%BeginProcSet: color.pro +%! +TeXDict begin/setcmykcolor where{pop}{/setcmykcolor{dup 10 eq{pop +setrgbcolor}{1 sub 4 1 roll 3{3 index add neg dup 0 lt{pop 0}if 3 1 roll +}repeat setrgbcolor pop}ifelse}B}ifelse/TeXcolorcmyk{setcmykcolor}def +/TeXcolorrgb{setrgbcolor}def/TeXcolorgrey{setgray}def/TeXcolorgray{ +setgray}def/TeXcolorhsb{sethsbcolor}def/currentcmykcolor where{pop}{ +/currentcmykcolor{currentrgbcolor 10}B}ifelse/DC{exch dup userdict exch +known{pop pop}{X}ifelse}B/GreenYellow{0.15 0 0.69 0 setcmykcolor}DC +/Yellow{0 0 1 0 setcmykcolor}DC/Goldenrod{0 0.10 0.84 0 setcmykcolor}DC +/Dandelion{0 0.29 0.84 0 setcmykcolor}DC/Apricot{0 0.32 0.52 0 +setcmykcolor}DC/Peach{0 0.50 0.70 0 setcmykcolor}DC/Melon{0 0.46 0.50 0 +setcmykcolor}DC/YellowOrange{0 0.42 1 0 setcmykcolor}DC/Orange{0 0.61 +0.87 0 setcmykcolor}DC/BurntOrange{0 0.51 1 0 setcmykcolor}DC +/Bittersweet{0 0.75 1 0.24 setcmykcolor}DC/RedOrange{0 0.77 0.87 0 +setcmykcolor}DC/Mahogany{0 0.85 0.87 0.35 setcmykcolor}DC/Maroon{0 0.87 +0.68 0.32 setcmykcolor}DC/BrickRed{0 0.89 0.94 0.28 setcmykcolor}DC/Red{ +0 1 1 0 setcmykcolor}DC/OrangeRed{0 1 0.50 0 setcmykcolor}DC/RubineRed{ +0 1 0.13 0 setcmykcolor}DC/WildStrawberry{0 0.96 0.39 0 setcmykcolor}DC +/Salmon{0 0.53 0.38 0 setcmykcolor}DC/CarnationPink{0 0.63 0 0 +setcmykcolor}DC/Magenta{0 1 0 0 setcmykcolor}DC/VioletRed{0 0.81 0 0 +setcmykcolor}DC/Rhodamine{0 0.82 0 0 setcmykcolor}DC/Mulberry{0.34 0.90 +0 0.02 setcmykcolor}DC/RedViolet{0.07 0.90 0 0.34 setcmykcolor}DC +/Fuchsia{0.47 0.91 0 0.08 setcmykcolor}DC/Lavender{0 0.48 0 0 +setcmykcolor}DC/Thistle{0.12 0.59 0 0 setcmykcolor}DC/Orchid{0.32 0.64 0 +0 setcmykcolor}DC/DarkOrchid{0.40 0.80 0.20 0 setcmykcolor}DC/Purple{ +0.45 0.86 0 0 setcmykcolor}DC/Plum{0.50 1 0 0 setcmykcolor}DC/Violet{ +0.79 0.88 0 0 setcmykcolor}DC/RoyalPurple{0.75 0.90 0 0 setcmykcolor}DC +/BlueViolet{0.86 0.91 0 0.04 setcmykcolor}DC/Periwinkle{0.57 0.55 0 0 +setcmykcolor}DC/CadetBlue{0.62 0.57 0.23 0 setcmykcolor}DC +/CornflowerBlue{0.65 0.13 0 0 setcmykcolor}DC/MidnightBlue{0.98 0.13 0 +0.43 setcmykcolor}DC/NavyBlue{0.94 0.54 0 0 setcmykcolor}DC/RoyalBlue{1 +0.50 0 0 setcmykcolor}DC/Blue{1 1 0 0 setcmykcolor}DC/Cerulean{0.94 0.11 +0 0 setcmykcolor}DC/Cyan{1 0 0 0 setcmykcolor}DC/ProcessBlue{0.96 0 0 0 +setcmykcolor}DC/SkyBlue{0.62 0 0.12 0 setcmykcolor}DC/Turquoise{0.85 0 +0.20 0 setcmykcolor}DC/TealBlue{0.86 0 0.34 0.02 setcmykcolor}DC +/Aquamarine{0.82 0 0.30 0 setcmykcolor}DC/BlueGreen{0.85 0 0.33 0 +setcmykcolor}DC/Emerald{1 0 0.50 0 setcmykcolor}DC/JungleGreen{0.99 0 +0.52 0 setcmykcolor}DC/SeaGreen{0.69 0 0.50 0 setcmykcolor}DC/Green{1 0 +1 0 setcmykcolor}DC/ForestGreen{0.91 0 0.88 0.12 setcmykcolor}DC +/PineGreen{0.92 0 0.59 0.25 setcmykcolor}DC/LimeGreen{0.50 0 1 0 +setcmykcolor}DC/YellowGreen{0.44 0 0.74 0 setcmykcolor}DC/SpringGreen{ +0.26 0 0.76 0 setcmykcolor}DC/OliveGreen{0.64 0 0.95 0.40 setcmykcolor} +DC/RawSienna{0 0.72 1 0.45 setcmykcolor}DC/Sepia{0 0.83 1 0.70 +setcmykcolor}DC/Brown{0 0.81 1 0.60 setcmykcolor}DC/Tan{0.14 0.42 0.56 0 +setcmykcolor}DC/Gray{0 0 0 0.50 setcmykcolor}DC/Black{0 0 0 1 +setcmykcolor}DC/White{0 0 0 0 setcmykcolor}DC end + +%%EndProcSet +TeXDict begin 39158280 55380996 1000 600 600 () @start +/Fa 106[21 149[{TeXBase1Encoding ReEncodeFont}1 59.7758 +/Times-Roman rf /Fb 135[77 2[77 77 77 3[77 77 77 3[77 +3[77 77 77 99[{TeXBase1Encoding ReEncodeFont}11 129.116 +/Courier-Bold rf /Fc 134[65 65 2[65 65 65 65 1[65 65 +65 65 65 2[65 65 65 65 65 65 65 65 65 1[65 36[65 6[65 +65 65 49[{TeXBase1Encoding ReEncodeFont}25 107.597 /Courier-Bold +rf /Fd 141[56 4[128 7[80 88 2[80 97[{TeXBase1Encoding ReEncodeFont}5 +143.462 /Helvetica-BoldOblique rf /Fe 147[21 4[37 1[33 +3[37 23[25 14[25 58[{TeXBase1Encoding ReEncodeFont}6 +74.7198 /Times-Italic rf /Ff 204[25 25 25 49[{ +TeXBase1Encoding ReEncodeFont}3 49.8132 /Times-Roman +rf +%DVIPSBitmapFont: Fg cmmi8 8 2 +/Fg 2 63 df60 +D<12E012F812FEEA3F80EA0FE0EA03F8EA00FEEB3F80EB0FE0EB03F8EB00FC143FEC0FC0 +EC07F0EC01FCEC007FED1FC0ED07F0ED01FCED007FEE1FC01607161FEE7F00ED01FCED07 +F0ED1FC0037FC7FCEC01FCEC07F0EC0FC0023FC8FC14FCEB03F8EB0FE0EB3F8001FEC9FC +EA03F8EA0FE0EA3F8000FECAFC12F812E02A2B7AA537>62 D E +%EndDVIPSBitmapFont +/Fh 131[40 1[40 40 40 40 40 40 40 40 40 40 40 40 40 40 +40 40 1[40 40 40 1[40 40 40 40 40 1[40 5[40 3[40 40 40 +40 40 40 40 40 40 40 40 1[40 40 40 1[40 40 40 40 40 1[40 +40 40 40 40 40 1[40 4[40 1[40 1[40 40 40 40 40 40 40 +40 40 40 40 1[40 40 40 33[{TeXBase1Encoding ReEncodeFont}69 +67.2479 /Courier rf /Fi 105[37 28[37 37 54 37 37 21 29 +25 37 37 37 37 58 21 37 1[21 37 37 25 33 37 33 37 33 +7[54 54 3[46 5[54 66 46 2[25 2[42 2[50 50 54 5[21 21 +11[19 1[19 2[25 25 25 4[30 31[42 2[{TeXBase1Encoding ReEncodeFont}45 +74.7198 /Times-Roman rf /Fj 135[55 7[61 2[89 28 6[55 +3[55 27[66 69[{TeXBase1Encoding ReEncodeFont}7 99.6264 +/Helvetica-Bold rf /Fk 145[27 2[27 57[27 49[{ +TeXBase1Encoding ReEncodeFont}3 44.8318 /Courier-Oblique +rf /Fl 135[50 3[50 50 3[50 50 3[50 50 3[50 1[50 50 2[50 +95[{TeXBase1Encoding ReEncodeFont}11 83.022 /Courier-Oblique +rf +%DVIPSBitmapFont: Fm cmmi10 10 2 +/Fm 2 63 df60 +D<126012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F +C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE +01FF9338007F80EF1FC0A2EF7F80933801FF00EE07FCEE1FF0EE7FC04B48C7FCED07FCED +1FF0ED7FC04A48C8FCEC07FCEC1FF0EC7FC04948C9FCEB07FCEB1FF0EB7FC04848CAFCEA +07FCEA3FF0EA7FC048CBFC12FC1270323279AD41>62 D E +%EndDVIPSBitmapFont +/Fn 134[45 45 1[45 45 45 45 45 1[45 45 45 45 45 1[45 +45 45 45 45 45 45 45 45 45 1[45 5[45 2[45 8[45 5[45 2[45 +45 1[45 19[45 45 44[{TeXBase1Encoding ReEncodeFont}32 +74.7198 /Courier-Oblique rf +%DVIPSBitmapFont: Fo cmmi9 9 2 +/Fo 2 63 df<171C177EEE01FEEE07FCEE1FF0EE7FC0923801FF00ED07FCED1FF0ED7FC0 +4A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07FCEA1FF0 +EA7FC048CAFCA2EA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F +C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE +01FEEE007E171C2F2E7AA93C>60 D<127012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007F +C0EB1FF0EB07FCEB01FF9038007FC0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED +01FF9238007FC0EE1FF0EE07FCEE01FEA2EE07FCEE1FF0EE7FC0923801FF00ED07FCED1F +F0ED7FC04A48C7FCEC07FCEC1FF0EC7FC04948C8FCEB07FCEB1FF0EB7FC04848C9FCEA07 +FCEA1FF0EA7FC048CAFC12FC12702F2E7AA93C>62 D E +%EndDVIPSBitmapFont +/Fp 134[66 66 93 66 73 40 66 47 1[73 73 73 106 33 2[33 +73 73 40 66 73 66 73 66 8[80 113 80 86 73 80 86 1[80 +1[86 100 73 2[33 86 1[73 80 86 86 1[86 1[73 5[66 66 66 +66 66 66 66 66 66 66 1[33 40 33 2[40 40 5[57 31[73 2[{ +TeXBase1Encoding ReEncodeFont}58 119.552 /Helvetica-Bold +rf /Fq 129[45 45 45 45 45 45 45 45 45 45 45 45 45 45 +45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 +45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45 +45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45 45 45 +45 45 45 45 1[45 45 45 45 45 45 45 45 45 45 45 45 45 +45 45 45 45 1[45 45 45 33[{TeXBase1Encoding ReEncodeFont}90 +74.7198 /Courier rf /Fr 134[37 37 55 37 42 23 32 32 1[42 +42 42 60 23 37 23 23 42 42 23 37 42 37 42 42 1[42 6[51 +69 1[60 46 42 2[51 1[55 69 46 2[28 3[51 60 55 1[51 1[42 +4[28 42 42 42 42 42 42 42 42 42 42 1[21 28 21 2[28 28 +6[28 30[42 2[{TeXBase1Encoding ReEncodeFont}58 83.022 +/Times-Italic rf /Fs 138[105 57 96 67 1[105 105 105 153 +48 1[48 48 105 105 57 96 105 96 105 96 8[115 163 1[124 +105 3[115 2[143 105 5[105 2[124 3[105 10[96 96 96 96 +2[48 43[105 2[{TeXBase1Encoding ReEncodeFont}35 172.154 +/Helvetica-Bold rf /Ft 106[23 29 29 25[33 33 48 33 33 +18 26 22 1[33 33 33 52 18 33 18 18 33 33 22 29 33 29 +33 29 8[48 3[41 37 2[37 6[22 1[48 12[18 10[18 17 1[17 +2[22 22 5[27 31[37 2[{TeXBase1Encoding ReEncodeFont}41 +66.4176 /Times-Roman rf /Fu 134[42 42 60 42 46 28 32 +37 1[46 42 46 69 23 46 1[23 46 42 28 37 46 37 46 42 9[83 +60 60 55 46 60 3[60 78 55 2[32 65 65 51 55 60 60 55 60 +1[42 6[42 1[42 42 42 42 42 42 2[21 28 21 4[28 39[{ +TeXBase1Encoding ReEncodeFont}53 83.022 /Times-Bold rf +/Fv 27[37 58[63 42[45 40 1[40 37 42 42 60 42 42 23 32 +28 42 42 42 42 65 23 42 23 23 42 42 28 37 42 37 42 37 +28 42 1[28 23 28 1[60 60 78 60 60 51 46 55 60 46 60 60 +74 51 60 1[28 60 60 46 51 60 55 55 60 1[37 47 47 47 23 +23 42 42 42 42 42 42 42 42 42 42 23 21 28 21 2[28 28 +28 65 69 1[42 34 28 29[46 46 2[{TeXBase1Encoding ReEncodeFont}90 +83.022 /Times-Roman rf /Fw 136[65 1[51 1[46 32 2[51 51 +1[23 2[23 51 51 1[46 51 2[46 8[55 3[51 3[55 11[60 9[28 +18[23 39[{TeXBase1Encoding ReEncodeFont}19 83.022 /Helvetica-Bold +rf /Fx 134[80 80 112 80 88 48 80 56 1[88 88 88 128 40 +80 1[40 88 88 48 80 88 80 88 80 8[96 1[96 104 88 96 104 +2[112 104 120 88 2[40 104 112 1[96 104 104 1[104 6[48 +4[80 80 80 80 80 2[40 48 45[{TeXBase1Encoding ReEncodeFont}48 +143.462 /Helvetica-Bold rf /Fy 138[126 1[115 80 8[57 +126 126 1[115 126 11[138 2[149 126 3[138 6[57 26[57 6[57 +39[{TeXBase1Encoding ReEncodeFont}15 206.584 /Helvetica-Bold +rf end +%%EndProlog +%%BeginSetup +%%Feature: *Resolution 600dpi +TeXDict begin +%%BeginPaperSize: a4 +a4 +%%EndPaperSize + +%%EndSetup +%%Page: 1 1 +1 0 bop Black Black 890 647 a Fy(The)58 b(PXP)f(user')-12 +b(s)58 b(guide)1384 2594 y Fx(Ger)m(d)39 b(Stolpmann)p +Black Black eop +%%Page: 2 2 +2 1 bop Black Black -2 579 a Fw(The)22 b(PXP)j(user')-5 +b(s)23 b(guide)-2 687 y Fv(by)d(Gerd)f(Stolpmann)-2 903 +y(Cop)o(yright)f(\251)j(1999,)e(2000)g(by)g(Gerd)h(Stolpmann)-2 +1135 y(PXP)h(is)g(a)g(v)n(alidating)d(parser)i(for)f(XML-1.0)g(which)h +(has)g(been)g(written)g(entirely)f(in)h(Objecti)n(v)o(e)g(Caml.)-2 +1285 y Fw(Do)o(wnload)h(PXP:)j Fv(The)c(free)g(PXP)h(library)e(can)h +(be)g(do)n(wnloaded)d(at)k(http://www)-5 b(.ocaml-programming)o(.de)o +(/pack)o(age)o(s/.)15 b(This)-2 1393 y(user')-5 b(s)20 +b(guide)f(is)j(included.)c(Ne)n(west)j(releases)f(of)g(PXP)h(will)g(be) +f(announced)e(in)i(The)g(OCaml)g(Link)g(Database)-2 1500 +y(\(http://www)-5 b(.npc.de/ocaml/linkdb)o(/\).)-2 1899 +y Fu(License)-2 2090 y Ft(This)16 b(document,)j(and)e(the)h(described)h +(softw)o(are,)f("PXP",)e(are)i(cop)o(yright)i(by)d(Gerd)g(Stolpmann.)-2 +2198 y(Permission)h(is)e(hereby)j(granted,)f(free)g(of)f(char)o(ge,)h +(to)f(an)o(y)h(person)f(obtaining)j(a)d(cop)o(y)h(of)f(this)h(document) +g(and)g(the)f("PXP")g(softw)o(are)i(\(the)f("Softw)o(are"\),)g(to)f +(deal)i(in)-2 2306 y(the)f(Softw)o(are)g(without)h(restriction,)g +(including)h(without)e(limitation)i(the)e(rights)g(to)f(use,)g(cop)o(y) +l(,)g(modify)l(,)g(mer)o(ge,)g(publish,)h(distrib)o(ute,)h(sublicense,) +g(and/or)f(sell)-2 2414 y(copies)g(of)f(the)h(Softw)o(are,)g(and)g(to)f +(permit)h(persons)f(to)h(whom)e(the)i(Softw)o(are)h(is)e(furnished)h +(to)f(do)g(so,)g(subject)h(to)g(the)f(follo)n(wing)j(conditions:)-2 +2522 y(The)d(abo)o(v)o(e)h(cop)o(yright)h(notice)g(and)f(this)f +(permission)h(notice)h(shall)f(be)g(included)h(in)e(all)h(copies)h(or)e +(substantial)i(portions)g(of)e(the)g(Softw)o(are.)-2 +2630 y(The)g(Softw)o(are)h(is)f(pro)o(vided)i(\223as)e(is\224,)g +(without)i(w)o(arranty)g(of)e(an)o(y)g(kind,)h(e)o(xpress)f(or)g +(implied,)i(including)g(b)o(ut)e(not)h(limited)h(to)e(the)h(w)o +(arranties)h(of)e(merchantability)l(,)-2 2737 y(\002tness)g(for)g(a)g +(particular)j(purpose)e(and)g(noninfringement.)i(In)d(no)g(e)n(v)o(ent) +h(shall)h(Gerd)e(Stolpmann)h(be)g(liable)h(for)e(an)o(y)g(claim,)h +(damages)g(or)f(other)h(liability)l(,)i(whether)-2 2845 +y(in)d(an)g(action)i(of)e(contract,)i(tort)f(or)f(otherwise,)i(arising) +f(from,)e(out)i(of)f(or)g(in)g(connection)j(with)e(the)f(Softw)o(are)i +(or)e(the)h(use)f(or)g(other)h(dealings)h(in)e(the)h(softw)o(are.)p +Black Black eop +%%Page: 3 3 +3 2 bop Black Black -2 621 a Fs(T)-14 b(ab)n(le)48 b(of)g(Contents)396 +815 y Fu(I.)21 b(User')m(s)g(guide)p Black 4 w(.)p Black +Black -1 w(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black 4 w(6)596 +943 y Fv(1.)f(What)g(is)h(XML?)p Black 4 w(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black 4 w(7)795 1051 +y(1.1.)e(Introduction)p Black 14 w(.)p Black Black -1 +w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black 4 w(7)994 1159 +y(1.1.1.)g(The)g("hello)h(w)o(orld")g(e)o(xample)p Black +13 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black 4 w(7)994 1267 y(1.1.2.)f(XML)h(parsers)g(and)f +(processors)p Black 3 w(.)p Black Black -2 w(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black 4 w(9)994 1375 y(1.1.3.)g(Discussion)p +Black 9 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +4 w(9)795 1483 y(1.2.)g(Highlights)g(of)h(XML)p Black +10 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(11)994 +1591 y(1.2.1.)f(The)g(DTD)i(and)e(the)i(instance)p Black +15 w(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(11)994 1699 y(1.2.2.)e(Reserv)o(ed)g(characters)p +Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(12)994 1807 y(1.2.3.)g(Elements)g(and)h +(ELEMENT)f(declarations)p Black 7 w(.)p Black Black -2 +w(.)p Black Black(.)p Black Black(.)p Black Black -1 +w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(13)994 +1915 y(1.2.4.)g(Attrib)n(ute)g(lists)j(and)e(A)-9 b(TTLIST)19 +b(declarations)p Black 6 w(.)p Black Black -2 w(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(15)994 2023 y(1.2.5.)g(P)o(arsed)g(entities)p +Black 18 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black 4 w(16)994 2131 y(1.2.6.)g(Notations)g(and)h +(unparsed)e(entities)p Black 14 w(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(19)795 2238 y(1.3.)h(A)i(complete)e(e)o(xample:)g +(The)h Fr(r)m(eadme)f Fv(DTD)p Black 3 w(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(20)596 2346 y(2.)h(Using)g(PXP)p Black +6 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(24)795 2454 y(2.1.)f(V)-9 b(alidation)p +Black 3 w(.)p Black Black -2 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795 +2562 y(2.2.)19 b(Ho)n(w)h(to)g(parse)g(a)h(document)d(from)h(an)h +(application)p Black 10 w(.)p Black Black -2 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(24)795 +2670 y(2.3.)f(Class-based)h(processing)f(of)h(the)g(node)g(tree)p +Black 8 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(29)795 +2778 y(2.4.)f(Example:)g(An)h(HTML)g(back)o(end)f(for)g(the)i +Fr(r)m(eadme)e Fv(DTD)p Black 3 w(.)p Black Black -1 +w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(33)994 2886 y(2.4.1.)g(Header)p +Black 9 w(.)p Black Black -2 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(33)994 2994 y(2.4.2.)g(T)-7 b(ype)19 +b(declarations)p Black 14 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(33)994 3102 y(2.4.3.)g(Class)i Fq(store)p Black +11 w Fv(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black 4 w(34)994 3210 y(2.4.4.)e(Function)g +Fq(escape_html)p Black Fv(.)p Black Black -2 w(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(35)994 3318 y(2.4.5.)g(V)-5 b(irtual)20 b(class)h +Fq(shared)p Black 4 w Fv(.)p Black Black -2 w(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(35)994 3426 y(2.4.6.)e(Class)i +Fq(only_data)p Black 17 w Fv(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(36)994 +3534 y(2.4.7.)e(Class)i Fq(readme)p Black 8 w Fv(.)p +Black Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(36)994 3642 y(2.4.8.)e(Classes)i +Fq(section)p Fv(,)f Fq(sect1)p Fv(,)f Fq(sect2)p Fv(,)h(and)g +Fq(sect3)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(39)994 3749 y(2.4.9.)f(Classes)i +Fq(map_tag)p Fv(,)f Fq(p)p Fv(,)g Fq(em)p Fv(,)g Fq(ul)p +Fv(,)g Fq(li)p Black 16 w Fv(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(39)994 +3857 y(2.4.10.)e(Class)k Fq(br)p Black Fv(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(40)994 3965 y(2.4.11.)c(Class)k +Fq(code)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(40)994 4073 y(2.4.12.)c(Class)k +Fq(a)p Black 4 w Fv(.)p Black Black -1 w(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(41)994 +4181 y(2.4.13.)c(Class)k Fq(footnote)p Black 1 w Fv(.)p +Black Black -2 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black 4 w(42)994 4289 +y(2.4.14.)c(The)i(speci\002cation)f(of)h(the)g(document)f(model)p +Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black 4 w(43)596 4397 y(3.)h(The)f(objects)h +(representing)e(the)j(document)p Black 4 w(.)p Black +Black -3 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(46)795 4505 y(3.1.)e(The)h Fq(document)f Fv(class)p +Black 7 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(46)795 4613 y(3.2.)g(The)h(class)h(type)f +Fq(node)p Black 2 w Fv(.)p Black Black -2 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(47)994 4721 y(3.2.1.)f(The)g(structure)h(of)g(document)e(trees)p +Black 3 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(49)994 +4829 y(3.2.2.)h(The)g(methods)h(of)f(the)i(class)g(type)f +Fq(node)p Black 13 w Fv(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black 4 w(52)p Black 3842 +5278 a Fr(3)p Black eop +%%Page: 4 4 +4 3 bop Black Black 994 579 a Fv(3.2.3.)19 b(The)g(class)j +Fq(element_impl)p Black 2 w Fv(.)p Black Black -3 w(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(56)994 687 y(3.2.4.)d(The)g(class)j Fq(data_impl)p +Black 12 w Fv(.)p Black Black -2 w(.)p Black Black -1 +w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(57)994 +795 y(3.2.5.)d(The)g(type)h Fq(spec)p Black 5 w Fv(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(58)994 903 y(3.2.6.)f(Examples)p Black +5 w(.)p Black Black -3 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(60)994 +1011 y(3.2.7.)g(Iterators)p Black 12 w(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(64)795 1119 y(3.3.)g(The)h(class)h(type)f Fq(extension)p +Black 6 w Fv(.)p Black Black -2 w(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black 4 w(65)994 1226 y(3.3.1.)f(Ho)n(w)h(to)g(de\002ne) +g(an)g(e)o(xtension)f(class)p Black 13 w(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black 4 w(66)994 1334 +y(3.3.2.)g(Ho)n(w)h(to)g(bind)f(e)o(xtension)g(classes)i(to)g(element)e +(types)p Black 10 w(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(68)795 1442 y(3.4.)g(Details)i(of)f(the)g(mapping)e(from)i(XML)g +(te)o(xt)g(to)g(the)g(tree)h(representation)p Black 13 +w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(69)994 +1550 y(3.4.1.)e(The)g(representation)g(of)g(character)n(-free)f +(elements)p Black 9 w(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(69)994 1658 y(3.4.2.)h(The)g(representation)g(of)g(character)g +(data)p Black 10 w(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black 4 w(70)994 1766 +y(3.4.3.)g(The)g(representation)g(of)g(entities)i(within)f(documents)p +Black 12 w(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black 4 w(70)994 1874 y(3.4.4.)f(The)g(representation)g +(of)g(attrib)n(utes)p Black 20 w(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(71)994 1982 y(3.4.5.)g(The)g(representation)g(of)g +(processing)g(instructions)p Black(.)p Black Black -1 +w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(71)994 2090 y(3.4.6.)g(The)g +(representation)g(of)g(comments)p Black 7 w(.)p Black +Black -1 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(71)994 2198 y(3.4.7.)g(The)g(attrib)n(utes)i +Fq(xml:lang)e Fv(and)h Fq(xml:space)p Black 10 w Fv(.)p +Black Black -2 w(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(72)994 2306 y(3.4.8.)f(And)g(what)h(about)g(namespaces?)p +Black 12 w(.)p Black Black -2 w(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(72)596 +2414 y(4.)g(Con\002guring)e(and)h(calling)h(the)g(parser)p +Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(73)795 2522 y(4.1.)f(Ov)o(ervie)n(w)p +Black 19 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(73)795 +2630 y(4.2.)g(Resolv)o(ers)h(and)g(sources)p Black 2 +w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black 4 w(75)994 2737 +y(4.2.1.)f(Using)h(the)g(b)n(uilt-in)f(resolv)o(ers)h(\(called)f +(sources\))p Black 5 w(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(75)994 2845 y(4.2.2.)g(The)g(resolv)o(er)g(API)p +Black 11 w(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(76)994 2953 y(4.2.3.)g(Prede\002ned)f(resolv)o(er)h +(components)p Black 13 w(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black 4 w(78)795 3061 +y(4.3.)g(The)h(DTD)g(classes)p Black 1 w(.)p Black Black +1 w(.)p Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(81)795 +3169 y(4.4.)f(In)m(v)n(oking)f(the)i(parser)p Black 14 +w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(89)994 +3277 y(4.4.1.)f(Def)o(aults)p Black 10 w(.)p Black Black +-1 w(.)p Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(89)994 3385 y(4.4.2.)g(P)o(arsing)g(functions)p +Black 4 w(.)p Black Black -3 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(90)994 3493 y(4.4.3.)g(Con\002guration)f(options)p +Black 19 w(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(91)994 3601 y(4.4.4.)h(Which)h +(con\002guration)d(should)i(I)i(use?)p Black 18 w(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(93)795 3709 y(4.5.)e(Updates)p Black 10 w(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(95)p Black 3842 5278 a +Fr(4)p Black eop +%%Page: 5 5 +5 4 bop Black Black -2 621 a Fs(List)48 b(of)g(Figures)396 +815 y Fv(3-1.)19 b(A)i(tree)f(with)h(element)e(nodes,)h(data)g(nodes,)f +(and)g(attrib)n(utes)p Black 18 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black 4 w(49)396 923 y(3-2.)g(Nodes)h(are)g(doubly)f(link)o +(ed)g(trees)p Black 15 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black 4 w(50)396 +1031 y(3-3.)g(A)i(node)e(can)h(only)g(be)g(added)f(if)h(it)h(is)g(a)g +(root)p Black 5 w(.)p Black Black -1 w(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black 4 w(51)396 1139 y(3-4.)e(A)i(deleted)f(node)f +(becomes)g(the)h(root)g(of)g(the)g(subtree)p Black 3 +w(.)p Black Black -1 w(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +4 w(51)396 1247 y(3-5.)f(The)h(clone)g(of)g(a)g(subtree)p +Black 18 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black 4 w(52)396 1355 y(3-6.)f(The)h(structure)g +(of)f(nodes)h(and)g(e)o(xtensions)p Black 18 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black -1 w(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black Black(.)p Black Black(.)p Black Black(.)p Black +Black -1 w(.)p Black Black(.)p Black Black(.)p Black +Black(.)p Black Black -1 w(.)p Black Black(.)p Black +Black(.)p Black Black(.)p Black Black -1 w(.)p Black +Black(.)p Black Black(.)p Black Black(.)p Black Black +-1 w(.)p Black Black(.)p Black Black(.)p Black Black(.)p +Black Black -1 w(.)p Black Black(.)p Black Black(.)p +Black Black(.)p Black Black(.)p Black Black -1 w(.)p +Black 4 w(65)p Black 3842 5278 a Fr(5)p Black eop +%%Page: 6 6 +6 5 bop Black Black 1241 647 a Fy(I.)58 b(User')-12 b(s)57 +b(guide)p Black Black eop +%%Page: 7 7 +7 6 bop Black Black -2 621 a Fs(Chapter)48 b(1.)f(What)h(is)f(XML?)-2 +1055 y Fx(1.1.)39 b(Intr)m(oduction)396 1235 y Fv(XML)20 +b(\(short)g(for)f Fr(Extensible)h(Markup)g(Langua)o(g)o(e)p +Fv(\))e(generalizes)h(the)h(idea)g(that)g(te)o(xt)g(documents)f(are)h +(typically)396 1343 y(structured)f(in)h(sections,)g(sub-sections,)f +(paragraphs,)f(and)i(so)g(on.)g(The)g(format)f(of)h(the)g(document)e +(is)j(not)f(\002x)o(ed)g(\(as,)396 1451 y(for)g(e)o(xample,)e(in)j +(HTML\),)e(b)n(ut)h(can)g(be)g(declared)f(by)h(a)h(so-called)e(DTD)i +(\(document)c(type)j(de\002nition\).)f(The)g(DTD)396 +1559 y(describes)h(only)f(the)i(rules)f(ho)n(w)f(the)i(document)d(can)i +(be)g(structured,)e(b)n(ut)j(not)e(ho)n(w)h(the)g(document)e(can)i(be) +396 1667 y(processed.)f(F)o(or)h(e)o(xample,)e(if)j(you)e(w)o(ant)i(to) +f(publish)f(a)i(book)e(that)h(uses)h(XML)f(markup,)e(you)h(will)i(need) +f(a)g(processor)396 1775 y(that)h(con)m(v)o(erts)d(the)i(XML)g(\002le)h +(into)f(a)h(printable)e(format)g(such)h(as)h(Postscript.)f(On)g(the)g +(one)g(hand,)f(the)h(structure)f(of)396 1883 y(XML)h(documents)f(is)i +(con\002gurable;)d(on)i(the)g(other)f(hand,)g(there)h(is)h(no)f(longer) +f(a)h(canonical)f(interpretation)f(of)i(the)396 1991 +y(elements)g(of)g(the)g(document;)f(for)g(e)o(xample)g(one)h(XML)g(DTD) +g(might)g(w)o(ant)g(that)g(paragraphes)e(are)i(delimited)g(by)396 +2099 y Fq(para)g Fv(tags,)h(and)e(another)g(DTD)h(e)o(xpects)g +Fq(p)g Fv(tags)h(for)e(the)i(same)f(purpose.)e(As)j(a)g(result,)f(for)g +(e)n(v)o(ery)e(DTD)j(a)f(ne)n(w)396 2206 y(processor)f(is)i(required.) +396 2356 y(Although)e(XML)h(can)g(be)g(used)g(to)g(e)o(xpress)g +(structured)f(te)o(xt)h(documents)e(it)j(is)g(not)f(limited)g(to)g +(this)h(kind)e(of)396 2464 y(application.)g(F)o(or)h(e)o(xample,)e(XML) +i(can)g(also)h(be)f(used)g(to)g(e)o(xchange)e(structured)h(data)h(o)o +(v)o(er)f(a)h(netw)o(ork,)f(or)h(to)396 2572 y(simply)g(store)g +(structured)f(data)h(in)g(\002les.)h(Note)f(that)h(XML)f(documents)e +(cannot)i(contain)f(arbitrary)f(binary)h(data)396 2680 +y(because)g(some)g(characters)g(are)g(forbidden;)e(for)i(some)g +(applications)g(you)f(need)h(to)h(encode)e(binary)g(data)h(as)h(te)o +(xt)g(\(e.g.)396 2788 y(the)g(base)h(64)f(encoding\).)-2 +3116 y Fp(1.1.1.)35 b(The)f("hello)g(w)n(orld")e(e)n(xample)396 +3283 y Fv(The)20 b(follo)n(wing)f(e)o(xample)f(sho)n(ws)j(a)f(v)o(ery)f +(simple)i(DTD,)f(and)f(a)i(corresponding)c(document)h(instance.)h(The) +396 3391 y(document)f(is)k(structured)c(such)i(that)h(it)f(consists)h +(of)f(sections,)g(and)g(that)g(sections)g(consist)h(of)f(paragraphs,)d +(and)j(that)396 3499 y(paragraphs)e(contain)h(plain)h(te)o(xt:)396 +3679 y Fq()396 +3777 y()396 3874 +y()396 4065 y Fv(The)20 +b(follo)n(wing)f(document)f(is)j(an)f(instance)g(of)g(this)h(DTD:)396 +4245 y Fq()396 +4342 y()396 +4439 y()486 4536 y(
)576 4633 y(This)e(is) +i(a)h(paragraph)e(of)i(the)f(first)g(section.)576 +4731 y(This)e(is)i(another)g(paragraph)f(of)i(the)f(first)g +(section.)486 4828 y(
)p Black 3839 +5278 a Fr(7)p Black eop +%%Page: 8 8 +8 7 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 486 579 a Fq(
)576 676 y(This)42 +b(is)i(the)h(only)f(paragraph)f(of)i(the)f(second)g +(section.)486 773 y(
)396 870 y(
)396 +1061 y Fv(As)21 b(in)g(HTML)f(\(and,)f(of)h(course,)f(in)h(grand-f)o +(ather)d(SGML\),)j(the)g("pieces")g(of)g(the)g(document)f(are)h +(delimited)f(by)396 1169 y(element)h(braces,)f(i.e.)i(such)f(a)g(piece) +g(be)o(gins)f(with)i Fo(<)p Fq(name-of-the-type-of-the-piece)p +Fo(>)15 b Fv(and)20 b(ends)g(with)396 1277 y Fo(<)p Fq +(/name-of-the-type-of-the-piece)p Fo(>)p Fv(,)15 b(and)20 +b(the)g(pieces)g(are)g(called)g Fr(elements)p Fv(.)g(Unlik)o(e)g(HTML)g +(and)396 1385 y(SGML,)g(both)g(start)g(tags)h(and)f(end)f(tags)i +(\(i.e.)f(the)g(delimiters)g(written)g(in)g(angle)g(brack)o(ets\))f +(can)h(ne)n(v)o(er)f(be)h(left)g(out.)396 1493 y(F)o(or)g(e)o(xample,)f +(HTML)h(calls)h(the)f(paragraphs)e(simply)i Fq(p)p Fv(,)g(and)f +(because)h(paragraphs)e(ne)n(v)o(er)h(contain)g(paragraphs,)f(a)396 +1601 y(sequence)h(of)h(se)n(v)o(eral)g(paragraphs)e(can)i(be)g(written) +g(as:)396 1781 y Fq(

First)44 b(paragraph)396 1878 +y(

Second)g(paragraph)396 2069 y Fv(This)21 b(is)g(not)f(possible)g +(in)g(XML;)g(continuing)e(our)i(e)o(xample)e(abo)o(v)o(e)h(we)h(must)h +(al)o(w)o(ays)f(write)396 2249 y Fq(First)42 +b(paragraph)396 2346 y(Second)g +(paragraph)396 2537 y Fv(The)20 b(rationale)f(behind)g +(that)h(is)i(to)e(\(1\))f(simplify)h(the)g(de)n(v)o(elopment)d(of)j +(XML)h(parsers)f(\(you)e(need)i(not)g(con)m(v)o(ert)e(the)396 +2645 y(DTD)j(into)f(a)g(deterministic)f(\002nite)i(automaton)d(which)i +(is)h(required)d(to)j(detect)f(omitted)f(tags\),)h(and)g(to)g(\(2\))g +(mak)o(e)f(it)396 2753 y(possible)h(to)h(parse)e(the)i(document)d +(independent)f(of)j(whether)f(the)i(DTD)f(is)h(kno)n(wn)e(or)h(not.)396 +2903 y(The)g(\002rst)h(line)f(of)g(our)g(sample)g(document,)396 +3083 y Fq()396 +3274 y Fv(is)21 b(the)e(so-called)g Fr(XML)h(declar)o(ation)p +Fv(.)d(It)j(e)o(xpresses)e(that)i(the)f(document)f(follo)n(ws)h(the)g +(con)m(v)o(entions)e(of)i(XML)g(v)o(ersion)396 3382 y(1.0,)h(and)f +(that)h(the)h(document)d(is)j(encoded)d(using)i(characters)f(from)g +(the)i(ISO-8859-1)c(character)i(set)i(\(often)e(kno)n(wn)396 +3490 y(as)i("Latin)e(1",)g(mostly)h(used)f(in)h(W)-7 +b(estern)20 b(Europe\).)d(Although)h(the)i(XML)g(declaration)e(is)i +(not)g(mandatory)-5 b(,)16 b(it)21 b(is)f(good)396 3598 +y(style)h(to)f(include)f(it;)i(e)n(v)o(erybody)c(sees)k(at)g(the)f +(\002rst)h(glance)f(that)g(the)g(document)e(uses)j(XML)f(markup)f(and)g +(not)h(the)396 3706 y(similar)n(-looking)e(HTML)i(and)g(SGML)g(markup)f +(languages.)f(If)i(you)g(omit)g(the)g(XML)g(declaration,)e(the)j +(parser)e(will)396 3813 y(assume)h(that)h(the)f(document)e(is)j +(encoded)e(as)i(UTF-8)e(or)h(UTF-16)f(\(there)h(is)h(a)g(rule)e(that)i +(mak)o(es)f(it)h(possible)f(to)396 3921 y(distinguish)f(between)h +(UTF-8)g(and)f(UTF-16)g(automatically\);)g(these)h(are)g(encodings)f +(of)h(Unicode')-5 b(s)19 b(uni)n(v)o(ersal)396 4029 y(character)g(set.) +i(\(Note)f(that)g(PXP,)h(unlik)o(e)e(its)i(predecessor)e("Markup",)f +(fully)i(supports)f(Unicode.\))396 4179 y(The)h(second)f(line,)396 +4359 y Fq()396 +4550 y Fv(names)20 b(the)g(DTD)h(that)f(is)h(going)e(to)h(be)g(used)g +(for)g(the)g(rest)h(of)f(the)g(document.)e(In)i(general,)f(it)i(is)g +(possible)f(that)g(the)396 4658 y(DTD)h(consists)f(of)g(tw)o(o)h +(parts,)f(the)g(so-called)f(e)o(xternal)g(and)h(the)g(internal)f +(subset.)h("External")f(means)h(that)g(the)h(DTD)396 +4766 y(e)o(xists)g(as)g(a)f(second)g(\002le;)h("internal")e(means)h +(that)g(the)g(DTD)h(is)g(included)d(in)j(the)f(same)g(\002le.)h(In)f +(this)g(e)o(xample,)f(there)p Black 3842 5278 a Fr(8)p +Black eop +%%Page: 9 9 +9 8 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 396 579 a Fv(is)g(only)f(an)g(e)o(xternal)f(subset,)h(and)g(the)g +(system)g(identi\002er)g("simple.dtd")e(speci\002es)j(where)f(the)g +(DTD)g(\002le)h(can)f(be)396 687 y(found.)e(System)j(identi\002ers)f +(are)g(interpreted)e(as)j(URLs;)g(for)f(instance)g(this)g(w)o(ould)g +(be)g(le)o(gal:)396 867 y Fq()396 1058 y Fv(Please)21 +b(note)f(that)g(PXP)h(cannot)e(interpret)g(HTTP)i(identi\002ers)e(by)h +(def)o(ault,)f(b)n(ut)i(it)g(is)g(possible)f(to)g(change)f(the)396 +1166 y(interpretation)f(of)i(system)h(identi\002ers.)396 +1315 y(The)f(w)o(ord)g(immediately)f(follo)n(wing)f Fq(DOCTYPE)i +Fv(determines)f(which)g(of)h(the)g(declared)f(element)h(types)g(\(here) +396 1423 y("document",)e("section",)h(and)h("paragraph"\))d(is)k(used)f +(for)g(the)g(outermost)f(element,)g(the)h Fr(r)l(oot)h(element)q +Fv(.)f(In)g(this)396 1531 y(e)o(xample)f(it)i(is)g Fq(document)f +Fv(because)f(the)h(outermost)f(element)h(is)h(delimited)e(by)h +Fo(<)p Fq(document)p Fo(>)f Fv(and)396 1639 y Fo(<)p +Fq(/document)p Fo(>)p Fv(.)396 1789 y(The)h(DTD)g(consists)h(of)f +(three)g(declarations)f(for)g(element)h(types:)g Fq(document)p +Fv(,)f Fq(section)p Fv(,)g(and)h Fq(paragraph)p Fv(.)f(Such)396 +1896 y(a)i(declaration)d(has)j(tw)o(o)f(parts:)396 2077 +y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(content-model)p Fo(>)396 +2268 y Fv(The)20 b(content)f(model)h(is)h(a)f(re)o(gular)f(e)o +(xpression)g(which)g(describes)h(the)g(possible)g(inner)f(structure)h +(of)g(the)g(element.)396 2376 y(Here,)g Fq(document)f +Fv(contains)h(one)g(or)g(more)f(sections,)h(and)g(a)g +Fq(section)g Fv(contains)f(one)h(or)g(more)f(paragraphs.)f(Note)396 +2483 y(that)j(these)f(tw)o(o)g(element)g(types)g(are)g(not)g(allo)n +(wed)f(to)i(contain)e(arbitrary)g(te)o(xt.)g(Only)h(the)g +Fq(paragraph)g Fv(element)f(type)396 2591 y(is)i(declared)e(such)h +(that)h(parsed)e(character)g(data)h(\(indicated)f(by)h(the)g(symbol)f +Fq(#PCDATA)p Fv(\))g(is)i(permitted.)396 2741 y(See)g(belo)n(w)e(for)h +(a)h(detailed)e(discussion)h(of)g(content)f(models.)-2 +3110 y Fp(1.1.2.)35 b(XML)e(par)n(ser)n(s)h(and)g(pr)n(ocessor)n(s)396 +3278 y Fv(XML)20 b(documents)f(are)h(human-readable,)c(b)n(ut)21 +b(this)f(is)h(not)f(the)h(main)e(purpose)g(of)h(this)h(language.)d(XML) +i(has)g(been)396 3386 y(designed)f(such)h(that)g(documents)f(can)h(be)g +(read)g(by)f(a)i(program)d(called)i(an)g Fr(XML)h(par)o(ser)r +Fv(.)f(The)g(parser)g(checks)f(that)396 3494 y(the)h(document)f(is)i +(well-formatted,)d(and)h(it)i(represents)f(the)g(document)e(as)j +(objects)f(of)g(the)g(programming)d(language.)396 3602 +y(There)j(are)g(tw)o(o)g(aspects)h(when)e(checking)g(the)h(document:)e +(First,)j(the)f(document)e(must)j(follo)n(w)e(some)h(basic)396 +3710 y(syntactic)g(rules,)g(such)g(as)h(that)f(tags)h(are)f(written)g +(in)g(angle)g(brack)o(ets,)f(that)h(for)g(e)n(v)o(ery)f(start)h(tag)h +(there)e(must)i(be)f(a)396 3818 y(corresponding)d(end)j(tag)g(and)f(so) +i(on.)f(A)g(document)e(respecting)h(these)i(rules)f(is)h +Fr(well-formed)r Fv(.)f(Second,)f(the)396 3926 y(document)f(must)j +(match)e(the)i(DTD)f(in)g(which)g(case)h(the)f(document)e(is)j +Fr(valid)r Fv(.)f(Man)o(y)f(parsers)h(check)f(only)h(on)396 +4034 y(well-formedness)e(and)i(ignore)f(the)h(DTD;)h(PXP)g(is)g +(designed)e(such)g(that)i(it)g(can)f(e)n(v)o(en)f(v)n(alidate)g(the)i +(document.)396 4183 y(A)g(parser)f(does)f(not)h(mak)o(e)g(a)h(sensible) +f(application,)e(it)j(only)f(reads)g(XML)g(documents.)e(The)i(whole)g +(application)396 4291 y(w)o(orking)f(with)h(XML-formatted)e(data)i(is)h +(called)f(an)g Fr(XML)h(pr)l(ocessor)r Fv(.)f(Often)g(XML)g(processors) +f(con)m(v)o(ert)396 4399 y(documents)g(into)h(another)e(format,)h(such) +h(as)h(HTML)f(or)g(Postscript.)g(Sometimes)g(processors)f(e)o(xtract)g +(data)h(of)g(the)396 4507 y(documents)f(and)g(output)g(the)i(processed) +e(data)h(again)f(XML-formatted.)e(The)j(parser)g(can)g(help)f(the)i +(application)396 4615 y(processing)e(the)h(document;)f(for)g(e)o +(xample)g(it)i(can)f(pro)o(vide)e(means)i(to)g(access)h(the)f(document) +e(in)j(a)f(speci\002c)h(manner)-5 b(.)396 4723 y(PXP)21 +b(supports)e(an)i(object-oriented)c(access)k(layer)e(specially)-5 +b(.)p Black 3842 5278 a Fr(9)p Black eop +%%Page: 10 10 +10 9 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black -2 583 a Fp(1.1.3.)35 b(Discussion)396 751 y Fv(As)21 +b(we)g(ha)n(v)o(e)e(seen,)h(there)g(are)g(tw)o(o)h(le)n(v)o(els)f(of)g +(description:)f(On)h(the)g(one)g(hand,)f(XML)h(can)g(de\002ne)f(rules)i +(about)e(the)396 859 y(format)g(of)h(a)h(document)d(\(the)i(DTD\),)g +(on)f(the)i(other)e(hand,)g(XML)h(e)o(xpresses)g(structured)f +(documents.)f(There)h(are)h(a)396 967 y(number)f(of)h(possible)f +(applications:)p Black 396 1199 a Ft(\225)p Black 60 +w Fv(XML)i(can)f(be)g(used)g(to)g(e)o(xpress)f(structured)g(te)o(xts.)h +(Unlik)o(e)g(HTML,)g(there)g(is)h(no)e(canonical)g(interpretation;)g +(one)479 1307 y(w)o(ould)h(ha)n(v)o(e)f(to)i(write)f(a)h(back)o(end)d +(for)i(the)g(DTD)g(that)h(translates)f(the)g(structured)f(te)o(xts)h +(into)g(a)h(format)e(that)479 1415 y(e)o(xisting)h(bro)n(wsers,)f +(printers)g(etc.)i(understand.)c(The)j(adv)n(antage)e(of)i(a)h +(self-de\002ned)e(document)f(format)h(is)i(that)f(it)479 +1523 y(is)h(possible)f(to)h(design)e(the)h(format)f(in)i(a)f(more)g +(problem-oriented)c(w)o(ay)-5 b(.)20 b(F)o(or)f(e)o(xample,)g(if)h(the) +h(task)f(is)h(to)g(e)o(xtract)479 1631 y(reports)f(from)f(a)h +(database,)g(one)f(can)h(use)h(a)f(DTD)h(that)f(re\003ects)h(the)f +(structure)f(of)h(the)g(report)f(or)h(the)g(database.)g(A)479 +1739 y(possible)g(approach)e(w)o(ould)i(be)g(to)g(ha)n(v)o(e)g(an)g +(element)f(type)h(for)g(e)n(v)o(ery)f(database)g(table)h(and)g(for)g(e) +n(v)o(ery)e(column.)479 1847 y(Once)i(the)g(DTD)h(has)f(been)g +(designed,)e(the)j(report)e(procedure)e(can)j(be)g(splitted)h(up)e(in)i +(a)f(part)g(that)h(selects)g(the)479 1955 y(database)f(ro)n(ws)g(and)g +(outputs)f(them)h(as)h(an)f(XML)g(document)e(according)g(to)j(the)f +(DTD,)g(and)g(in)g(a)g(part)g(that)479 2063 y(translates)h(the)f +(document)e(into)i(other)f(formats.)g(Of)i(course,)e(the)h(latter)h +(part)e(can)h(be)h(solv)o(ed)e(in)h(a)h(generic)e(w)o(ay)-5 +b(,)479 2170 y(e.g.)20 b(there)g(may)f(be)h(con\002gurable)e(back)o +(ends)h(for)h(all)g(DTDs)h(that)f(follo)n(w)g(the)g(approach)e(and)i +(ha)n(v)o(e)f(element)h(types)479 2278 y(for)g(tables)g(and)g(columns.) +479 2428 y(XML)h(plays)f(the)g(role)g(of)g(a)g(con\002gurable)e +(intermediate)h(format.)g(The)g(database)h(e)o(xtraction)e(function)h +(can)h(be)479 2536 y(written)g(without)g(ha)n(ving)f(to)h(kno)n(w)f +(the)h(details)h(of)f(typesetting;)f(the)h(back)o(ends)f(can)h(be)g +(written)g(without)g(ha)n(ving)479 2644 y(to)h(kno)n(w)e(the)h(details) +h(of)e(the)i(database.)479 2793 y(Of)g(course,)e(there)h(are)g +(traditional)f(solutions.)g(One)h(can)g(de\002ne)g(an)g(ad)g(hoc)g +(intermediate)e(te)o(xt)j(\002le)f(format.)f(This)479 +2901 y(disadv)n(antage)f(is)k(that)e(there)g(are)g(no)f(names)h(for)g +(the)g(pieces)g(of)g(the)g(format,)f(and)h(that)g(such)g(formats)g +(usually)f(lack)479 3009 y(of)h(documentation)d(because)j(of)g(this.)g +(Another)f(solution)g(w)o(ould)h(be)g(to)g(ha)n(v)o(e)g(a)h(binary)e +(representation,)e(either)j(as)479 3117 y(language-dependent)c(or)k +(language-independent)14 b(structure)20 b(\(e)o(xample)e(of)i(the)g +(latter)h(can)f(be)g(found)e(in)j(RPC)479 3225 y(implementations\).)d +(The)i(disadv)n(antage)e(is)j(that)f(it)h(is)g(harder)e(to)i(vie)n(w)f +(such)g(representations,)e(one)h(has)i(to)f(write)479 +3333 y(pretty)g(printers)f(for)h(this)g(purpose.)f(It)h(is)h(also)g +(more)e(dif)n(\002cult)h(to)g(enter)g(test)h(data;)f(XML)g(is)h(plain)f +(te)o(xt)g(that)h(can)f(be)479 3441 y(written)g(using)g(an)g(arbitrary) +f(editor)g(\(Emacs)h(has)g(e)n(v)o(en)f(a)i(good)e(XML)h(mode,)f +(PSGML\).)h(All)h(these)f(alternati)n(v)o(es)479 3549 +y(suf)n(fer)g(from)f(a)h(missing)g(structure)g(check)o(er)m(,)e(i.e.)i +(the)h(programs)d(processing)h(these)h(formats)f(usually)h(do)g(not)479 +3657 y(check)g(the)g(input)f(\002le)i(or)f(input)g(object)f(in)i +(detail;)f(XML)g(parsers)g(check)f(the)h(syntax)g(of)g(the)g(input)g +(\(the)f(so-called)479 3765 y(well-formedness)f(check\),)h(and)h(the)g +(adv)n(anced)e(parsers)i(lik)o(e)g(PXP)h(e)n(v)o(en)f(v)o(erify)e(that) +j(the)f(structure)f(matches)h(the)479 3872 y(DTD)h(\(the)f(so-called)f +(v)n(alidation\).)p Black 396 4022 a Ft(\225)p Black +60 w Fv(XML)i(can)f(be)g(used)g(as)g(con\002gurable)e(communication)g +(language.)g(A)i(fundamental)e(problem)h(of)h(e)n(v)o(ery)479 +4130 y(communication)e(is)j(that)f(sender)f(and)h(recei)n(v)o(er)f +(must)h(follo)n(w)g(the)g(same)g(con)m(v)o(entions)e(about)h(the)h +(language.)e(F)o(or)479 4238 y(data)i(e)o(xchange,)e(the)i(question)f +(is)j(usually)d(which)h(data)g(records)f(and)h(\002elds)g(are)g(a)n(v)n +(ailable,)g(ho)n(w)g(the)o(y)f(are)479 4346 y(syntactically)h +(composed,)e(and)i(which)f(v)n(alues)h(are)g(possible)g(for)g(the)g(v)n +(arious)f(\002elds.)h(Similar)h(questions)e(arise)479 +4454 y(for)h(te)o(xt)g(document)e(e)o(xchange.)g(XML)i(does)g(not)g +(answer)g(these)g(problems)f(completely)-5 b(,)18 b(b)n(ut)i(it)h +(reduces)e(the)479 4562 y(number)g(of)h(ambiguities)f(for)g(such)h(con) +m(v)o(entions:)e(The)i(outlines)f(of)h(the)g(syntax)g(are)g +(speci\002ed)g(by)g(the)g(DTD)g(\(b)n(ut)479 4669 y(not)g(necessarily)g +(the)g(details\),)g(and)g(XML)g(introduces)e(canonical)h(names)h(for)g +(the)g(components)e(of)i(documents)479 4777 y(such)g(that)h(it)f(is)i +(simpler)d(to)i(describe)e(the)h(rest)h(of)f(the)g(syntax)g(and)f(the)h +(semantics)h(informally)-5 b(.)p Black 3800 5278 a Fr(10)p +Black eop +%%Page: 11 11 +11 10 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black Black 396 579 a Ft(\225)p Black 60 w Fv(XML)f(is)g(a)g(data)f +(storage)g(format.)f(Currently)-5 b(,)17 b(e)n(v)o(ery)h(softw)o(are)h +(product)f(tends)h(to)h(use)f(its)i(o)n(wn)d(w)o(ay)i(to)f(store)h +(data;)479 687 y(commercial)f(softw)o(are)h(often)f(does)h(not)g +(describe)f(such)h(formats,)f(and)h(it)h(is)g(a)g(pain)e(to)i(inte)o +(grate)e(such)h(softw)o(are)479 795 y(into)g(a)g(bigger)f(project.)f +(XML)i(can)g(help)f(to)h(impro)o(v)o(e)e(this)j(situation)e(when)g(se)n +(v)o(eral)g(applications)g(share)h(the)g(same)479 903 +y(syntax)g(of)g(data)g(\002les.)h(DTDs)f(are)g(then)g(neutral)g +(instances)g(that)g(check)f(the)h(format)g(of)f(data)i(\002les)g +(independent)c(of)479 1011 y(applications.)-2 1512 y +Fx(1.2.)39 b(Highlights)e(of)i(XML)396 1692 y Fv(This)21 +b(section)f(e)o(xplains)f(man)o(y)g(of)h(the)g(features)f(of)h(XML,)g +(b)n(ut)h(not)e(all,)i(and)f(some)g(features)f(not)h(in)g(detail.)g(F)o +(or)g(a)396 1800 y(complete)f(description,)g(see)i(the)f(XML)g +(speci\002cation)396 1908 y(\(http://www)-5 b(.w3.or)o +(g/TR/1998/REC-xml-)o(19)o(98)o(02)o(10)o(.htm)o(l\).)-2 +2236 y Fp(1.2.1.)35 b(The)f(DTD)g(and)g(the)f(instance)396 +2404 y Fv(The)20 b(DTD)g(contains)g(v)n(arious)f(declarations;)g(in)h +(general)f(you)h(can)g(only)f(use)i(a)f(feature)f(if)i(you)e(ha)n(v)o +(e)h(pre)n(viously)396 2512 y(declared)f(it.)i(The)f(document)e +(instance)i(\002le)h(may)e(contain)g(the)i(full)f(DTD,)g(b)n(ut)g(it)h +(is)g(also)g(possible)f(to)g(split)h(the)f(DTD)396 2619 +y(into)g(an)g(internal)g(and)f(an)h(e)o(xternal)f(subset.)h(A)h +(document)d(must)j(be)o(gin)e(as)h(follo)n(ws)g(if)h(the)f(full)g(DTD)g +(is)h(included:)396 2800 y Fo(<)p Fq(?xml)44 b(version="1.0")f +(encoding=")p Fn(Your)f(encoding)t Fq("?)p Fo(>)396 2897 +y(<)p Fq(!DOCTYPE)h Fn(root)i Fq([)486 2994 y Fn(Declarations)396 +3091 y Fq(])p Fo(>)396 3282 y Fv(These)20 b(declarations)f(are)h +(called)g(the)h Fr(internal)e(subset)q Fv(.)i(Note)f(that)g(the)g +(usage)g(of)g(entities)h(and)e(conditional)g(sections)396 +3390 y(is)i(restricted)f(within)g(the)g(internal)g(subset.)396 +3539 y(If)g(the)h(declarations)d(are)j(located)e(in)h(a)h(dif)n(ferent) +e(\002le,)h(you)f(can)h(refer)g(to)g(this)h(\002le)g(as)g(follo)n(ws:) +396 3720 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p +Fn(Your)f(encoding)t Fq("?)p Fo(>)396 3817 y(<)p Fq(!DOCTYPE)h +Fn(root)i Fq(SYSTEM)e(")p Fn(file)h(name)p Fq(")p Fo(>)396 +4008 y Fv(The)20 b(declarations)f(in)h(the)h(\002le)f(are)h(called)f +(the)g Fr(e)n(xternal)g(subset)q Fv(.)g(The)g(\002le)h(name)f(is)h +(called)f(the)g Fr(system)h(identi\002er)r Fv(.)e(It)396 +4116 y(is)i(also)g(possible)f(to)g(refer)g(to)g(the)g(\002le)h(by)f(a)g +(so-called)g Fr(public)f(identi\002er)r Fv(,)g(b)n(ut)i(most)f(XML)g +(applications)f(w)o(on')o(t)g(use)396 4223 y(this)i(feature.)396 +4373 y(Y)-9 b(ou)20 b(can)g(also)g(specify)g(both)f(internal)h(and)f(e) +o(xternal)g(subsets.)i(In)e(this)i(case,)g(the)f(declarations)f(of)h +(both)f(subsets)i(are)396 4481 y(mix)o(ed,)e(and)h(if)g(there)g(are)g +(con\003icts,)g(the)g(declaration)f(of)h(the)g(internal)f(subset)i(o)o +(v)o(errides)d(those)i(of)g(the)g(e)o(xternal)396 4589 +y(subset)h(with)f(the)g(same)h(name.)e(This)h(looks)g(as)h(follo)n(ws:) +396 4769 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p +Fn(Your)f(encoding)t Fq("?)p Fo(>)396 4866 y(<)p Fq(!DOCTYPE)h +Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)p +Black 3800 5278 a Fr(11)p Black eop +%%Page: 12 12 +12 11 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 486 579 a Fn(Declarations)396 676 y Fq(])p Fo(>)396 +909 y Fv(The)f(XML)g(declaration)f(\(the)h(string)g(be)o(ginning)d +(with)k Fo(<)p Fq(?xml)e Fv(and)h(ending)f(at)i Fq(?)p +Fo(>)p Fv(\))f(should)f(specify)g(the)h(encoding)396 +1016 y(of)g(the)g(\002le.)h(Common)e(v)n(alues)h(are)g(UTF-8,)f(and)h +(the)g(ISO-8859)e(series)j(of)f(character)f(sets.)i(Note)f(that)g(e)n +(v)o(ery)f(\002le)396 1124 y(parsed)h(by)f(the)i(XML)f(processor)f(can) +h(be)o(gin)f(with)h(an)g(XML)h(declaration)d(and)i(that)g(e)n(v)o(ery)f +(\002le)i(may)e(ha)n(v)o(e)h(its)h(o)n(wn)396 1232 y(encoding.)396 +1382 y(The)f(name)g(of)g(the)g(root)f(element)h(must)g(be)g(mentioned)f +(directly)g(after)h(the)g Fq(DOCTYPE)g Fv(string.)f(This)i(means)e +(that)i(a)396 1490 y(full)f(document)f(instance)g(looks)h(lik)o(e)396 +1670 y Fo(<)p Fq(?xml)44 b(version="1.0")f(encoding=")p +Fn(Your)f(encoding)t Fq("?)p Fo(>)396 1767 y(<)p Fq(!DOCTYPE)h +Fn(root)89 b Fq(SYSTEM)44 b(")p Fn(file)g(name)p Fq(")g([)486 +1864 y Fn(Declarations)396 1961 y Fq(])p Fo(>)396 2156 +y(<)p Fn(root)p Fo(>)486 2253 y Fn(inner)g(contents)396 +2350 y Fo(<)p Fq(/)p Fn(root)p Fo(>)-2 2802 y Fp(1.2.2.)35 +b(Reser)q(ved)h(c)o(haracter)n(s)396 2970 y Fv(Some)20 +b(characters)f(are)i(generally)d(reserv)o(ed)h(to)h(indicate)g(markup)e +(such)i(that)g(the)o(y)g(cannot)f(be)h(used)g(for)g(character)396 +3078 y(data.)g(These)g(characters)f(are)h Fm(<)p Fv(,)h +Fm(>)p Fv(,)f(and)f(&.)h(Furthermore,)e(single)i(and)g(double)e(quotes) +i(are)g(sometimes)g(reserv)o(ed.)396 3186 y(If)g(you)g(w)o(ant)g(to)g +(include)f(such)h(a)h(character)e(as)i(character)m(,)d(write)j(it)f(as) +h(follo)n(ws:)p Black 396 3473 a Ft(\225)p Black 60 w +Fq(<)f Fv(instead)g(of)g Fm(<)p Black 396 3581 a Ft(\225)p +Black 60 w Fq(>)g Fv(instead)g(of)g Fm(>)p Black 396 +3689 a Ft(\225)p Black 60 w Fq(&)g Fv(instead)g(of)g(&)p +Black 396 3797 a Ft(\225)p Black 60 w Fq(')g Fv(instead)g(of)g(')p +Black 396 3905 a Ft(\225)p Black 60 w Fq(")g Fv(instead)g(of)g(") +396 4054 y(All)h(other)e(characters)h(are)g(free)g(in)g(the)g(document) +e(instance.)i(It)g(is)i(possible)d(to)i(include)e(a)i(character)e(by)g +(its)j(position)396 4162 y(in)f(the)f(Unicode)f(alphabet:)396 +4342 y Fq(&#)p Fn(n)p Fq(;)396 4533 y Fv(where)h Fl(n)g +Fv(is)i(the)e(decimal)f(number)g(of)h(the)g(character)-5 +b(.)19 b(Alternati)n(v)o(ely)-5 b(,)18 b(you)h(can)h(specify)g(the)g +(character)f(by)h(its)396 4641 y(he)o(xadecimal)e(number:)396 +4822 y Fq(&#x)p Fn(n)p Fq(;)p Black 3800 5278 a Fr(12)p +Black eop +%%Page: 13 13 +13 12 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 396 579 a Fv(In)f(the)g(scope)g(of)g(declarations,)f(the)h +(character)f(\045)i(is)g(no)f(longer)f(free.)g(T)-7 b(o)20 +b(include)g(it)h(as)f(character)m(,)f(you)g(must)h(use)396 +687 y(the)g(notations)g Fq(%)g Fv(or)f Fq(%)p +Fv(.)396 836 y(Note)h(that)h(besides)f(<,)g(>,)g(&,)f +(',)g(and)h(")f(there)h(are)g(no)g(prede\002nes)f(character)g +(entities.)h(This)396 944 y(is)h(dif)n(ferent)e(from)g(HTML)h(which)g +(de\002nes)g(a)g(list)i(of)d(characters)h(that)g(can)g(be)g(referenced) +e(by)i(name)f(\(e.g.)h(ä)396 1052 y(for)g(\344\);)g(ho)n(we)n(v)o +(er)m(,)e(if)i(you)g(prefer)e(named)i(characters,)f(you)g(can)h +(declare)f(such)h(entities)h(yourself)e(\(see)h(belo)n(w\).)-2 +1422 y Fp(1.2.3.)35 b(Elements)g(and)f(ELEMENT)e(dec)n(larations)396 +1589 y Fv(Elements)20 b(structure)f(the)h(document)f(instance)g(in)i(a) +f(hierarchical)f(w)o(ay)-5 b(.)20 b(There)f(is)i(a)g(top-le)n(v)o(el)d +(element,)i(the)g Fr(r)l(oot)396 1697 y(element)q Fv(,)g(which)g +(contains)g(a)g(sequence)f(of)h(inner)g(elements)f(and)h(character)f +(sections.)h(The)g(inner)f(elements)h(are)396 1805 y(structured)f(in)h +(the)f(same)h(w)o(ay)-5 b(.)20 b(Ev)o(ery)e(element)h(has)h(an)g +Fr(element)f(type)p Fv(.)h(The)f(be)o(ginning)f(of)h(the)h(element)f +(is)i(indicated)396 1913 y(by)f(a)h Fr(start)g(ta)o(g)p +Fv(,)e(written)396 2093 y Fo(<)p Fn(element-type)p Fo(>)396 +2284 y Fv(and)h(the)g(element)g(continues)f(until)h(the)g +(corresponding)d Fr(end)i(ta)o(g)h Fv(is)h(reached:)396 +2465 y Fo(<)p Fq(/)p Fn(element-type)p Fo(>)396 2655 +y Fv(In)f(XML,)f(it)i(is)f(not)g(allo)n(wed)f(to)h(omit)f(start)i(or)e +(end)g(tags,)h(e)n(v)o(en)f(if)h(the)g(DTD)g(w)o(ould)f(permit)g(this.) +h(Note)g(that)g(there)f(are)396 2763 y(no)h(special)g(rules)g(ho)n(w)g +(to)g(interpret)g(spaces)g(or)g(ne)n(wlines)g(near)f(start)i(or)f(end)g +(tags;)g(all)h(spaces)f(and)g(ne)n(wlines)g(count.)396 +2913 y(Ev)o(ery)f(element)h(type)f(must)i(be)f(declared)f(before)f(it)j +(can)f(be)g(used.)g(The)g(declaration)f(consists)h(of)g(tw)o(o)h +(parts:)f(the)396 3021 y(ELEMENT)f(declaration)f(describes)h(the)h +(content)f(model,)f(i.e.)i(which)f(inner)g(elements)g(are)h(allo)n +(wed;)f(the)h(A)-9 b(TTLIST)396 3129 y(declaration)19 +b(describes)h(the)g(attrib)n(utes)g(of)g(the)g(element.)396 +3278 y(An)g(element)g(can)g(simply)g(allo)n(w)g(e)n(v)o(erything)e(as)i +(content.)f(This)i(is)g(written:)396 3458 y Fo(<)p Fq(!ELEMENT)43 +b Fn(name)i Fq(ANY)p Fo(>)396 3649 y Fv(On)20 b(the)h(opposite,)e(an)h +(element)f(can)h(be)g(forced)f(to)i(be)f(empty;)f(declared)g(by:)396 +3829 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(EMPTY)p Fo(>)396 +4020 y Fv(Note)20 b(that)h(there)e(is)j(an)e(abbre)n(viated)e(notation) +h(for)g(empty)g(element)h(instances:)g Fo(<)p Fn(name)p +Fq(/)p Fo(>)p Fv(.)396 4170 y(There)g(are)g(tw)o(o)g(more)g +(sophisticated)f(forms)g(of)h(declarations:)f(so-called)h +Fr(mixed)g(declar)o(ations)p Fv(,)e(and)i Fr(r)m(e)m(gular)396 +4278 y(e)n(xpr)m(essions)p Fv(.)g(An)h(element)e(with)i(mix)o(ed)e +(content)g(contains)g(character)g(data)h(interspersed)f(with)i(inner)e +(elements,)396 4386 y(and)h(the)g(set)h(of)f(allo)n(wed)g(inner)f +(elements)h(can)g(be)g(speci\002ed.)g(In)f(contrast)h(to)g(this,)h(a)g +(re)o(gular)d(e)o(xpression)396 4494 y(declaration)h(does)h(not)g(allo) +n(w)g(character)f(data,)h(b)n(ut)g(the)g(inner)f(elements)h(can)g(be)g +(described)f(by)h(the)g(more)g(po)n(werful)396 4601 y(means)g(of)g(re)o +(gular)f(e)o(xpressions.)396 4751 y(A)i(declaration)e(for)g(mix)o(ed)g +(content)g(looks)h(as)h(follo)n(ws:)p Black 3800 5278 +a Fr(13)p Black eop +%%Page: 14 14 +14 13 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 396 579 a Fo(<)p Fq(!ELEMENT)43 b Fn(name)i Fq(\(#PCDATA)e(|)i +Fn(element)1892 609 y Fk(1)1962 579 y Fq(|)g(...)f(|)h +Fn(element)2636 609 y Fk(n)2707 579 y Fq(\)*)p Fo(>)396 +770 y Fv(or)20 b(if)h(you)e(do)h(not)g(w)o(ant)g(to)g(allo)n(w)g(an)o +(y)g(inner)f(element,)h(simply)396 950 y Fo(<)p Fq(!ELEMENT)43 +b Fn(name)i Fq(\(#PCDATA\))p Fo(>)396 1279 y Fj(Example)479 +1426 y Fi(If)19 b(element)g(type)g Fh(q)g Fi(is)g(declared)h(as)479 +1596 y Fh()479 +1776 y Fi(this)19 b(is)f(a)h(le)o(gal)g(instance:)479 +1947 y Fh(This)43 b(is)e(character)j(datawith)h(inner) +g(elements)479 2127 y Fi(But)19 b(this)g(is)f(ille)o(gal)g(because) +i Fh(t)f Fi(has)h(not)f(been)g(enumerated)i(in)e(the)g(declaration:)479 +2297 y Fh(This)43 b(is)e(character)j(datawith)h(inner) +g(elements)396 2571 y Fv(The)20 b(other)f(form)h(uses)g(a)h(re)o +(gular)e(e)o(xpression)f(to)j(describe)e(the)h(possible)g(contents:)396 +2752 y Fo(<)p Fq(!ELEMENT)43 b Fn(name)i(regexp)p Fo(>)396 +2942 y Fv(The)20 b(follo)n(wing)f(well-kno)n(wn)f(re)o(ge)o(xp)g +(operators)h(are)h(allo)n(wed:)p Black 396 3299 a Ft(\225)p +Black 60 w Fn(element-name)p Black 396 3407 a Ft(\225)p +Black 60 w Fq(\()p Fn(subexpr)839 3437 y Fk(1)910 3407 +y Fq(,)g Fv(...)g Fq(,)45 b Fn(subexpr)1463 3437 y Fk(n)1533 +3407 y Fq(\))p Black 396 3515 a Ft(\225)p Black 60 w +Fq(\()p Fn(subexpr)839 3545 y Fk(1)910 3515 y Fq(|)20 +b Fv(...)g Fq(|)45 b Fn(subexpr)1463 3545 y Fk(n)1533 +3515 y Fq(\))p Black 396 3623 a Ft(\225)p Black 60 w +Fn(subexpr)s Fq(*)p Black 396 3731 a Ft(\225)p Black +60 w Fn(subexpr)s Fq(+)p Black 396 3839 a Ft(\225)p Black +60 w Fn(subexpr)s Fq(?)396 3989 y Fv(The)20 b Fq(,)h +Fv(operator)d(indicates)i(a)h(sequence)e(of)h(sub-models,)e(the)i +Fq(|)h Fv(operator)d(describes)i(alternati)n(v)o(e)f(sub-models.)f(The) +396 4096 y Fq(*)j Fv(indicates)f(zero)f(or)h(more)g(repetitions,)f(and) +g Fq(+)i Fv(one)f(or)f(more)h(repetitions.)f(Finally)-5 +b(,)19 b Fq(?)i Fv(can)f(be)g(used)g(for)f(optional)396 +4204 y(sub-models.)g(As)i(atoms)f(the)g(re)o(ge)o(xp)e(can)i(contain)f +(names)h(of)g(elements;)g(note)g(that)g(it)h(is)g(not)f(allo)n(wed)f +(to)i(include)396 4312 y Fq(#PCDATA)p Fv(.)396 4462 y(The)f(e)o(xact)g +(syntax)f(of)h(the)g(re)o(gular)f(e)o(xpressions)g(is)i(rather)e +(strange.)h(This)g(can)g(be)g(e)o(xplained)f(best)h(by)g(a)g(list)i(of) +396 4570 y(constraints:)p Black 396 4802 a Ft(\225)p +Black 60 w Fv(The)e(outermost)f(e)o(xpression)g(must)h(not)g(be)g +Fn(element-name)p Fv(.)p Black 3800 5278 a Fr(14)p Black +eop +%%Page: 15 15 +15 14 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 479 579 a(Ille)m(gal:)e Fq()p +Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq()p Fv(.)p Black 396 728 a Ft(\225)p Black +60 w Fv(F)o(or)20 b(the)g(unary)f(operators)g Fn(subexpr)s +Fq(*)p Fv(,)g Fn(subexpr)s Fq(+)p Fv(,)g(and)g Fn(subexpr)s +Fq(?)p Fv(,)g(the)h Fn(subexpr)i Fv(must)f(not)f(be)g(again)f(an)479 +836 y(unary)g(operator)-5 b(.)479 986 y Fr(Ille)m(gal:)19 +b Fq()p Fv(;)20 b(this)h(must)f(be)g(written)g +(as)h Fq()p Fv(.)p Black 396 +1135 a Ft(\225)p Black 60 w Fv(Between)21 b Fq(\))f Fv(and)g(one)f(of)h +(the)h(unary)d(operatory)g Fq(*)p Fv(,)j Fq(+)p Fv(,)f(or)g +Fq(?)p Fv(,)g(there)g(must)g(not)g(be)g(whitespace.)479 +1285 y Fr(Ille)m(gal:)f Fq()p +Fv(;)21 b(this)f(must)h(be)f(written)g(as)h Fq()p Fv(.)p Black 396 1434 a Ft(\225)p Black +60 w Fv(There)20 b(is)h(the)f(additional)f(constraint)g(that)h(the)h +(right)e(parenthsis)g(must)i(be)f(contained)e(in)j(the)f(same)g(entity) +g(as)h(the)479 1542 y(left)g(parenthesis;)e(see)i(the)f(section)g +(about)f(parsed)h(entities)g(belo)n(w)-5 b(.)396 1733 +y(Note)20 b(that)g(there)g(is)h(another)e(restriction)g(on)h(re)o +(gular)e(e)o(xpressions)h(which)h(must)g(be)g(deterministic.)f(This)h +(means)g(that)396 1841 y(the)g(parser)g(must)g(be)g(able)g(to)h(see)g +(by)e(looking)g(at)i(the)f(ne)o(xt)f(tok)o(en)h(which)f(alternati)n(v)o +(e)g(is)i(actually)f(used,)g(or)f(whether)396 1949 y(the)h(repetition)f +(stops.)i(The)f(reason)f(for)g(this)i(is)g(simply)f(compatability)f +(with)h(SGML)g(\(there)g(is)h(no)f(intrinsic)f(reason)396 +2057 y(for)h(this)h(rule;)e(XML)i(can)f(li)n(v)o(e)g(without)f(this)i +(restriction\).)396 2302 y Fj(Example)479 2449 y Fi(The)e(elements)g +(are)g(declared)h(as)f(follo)n(ws:)479 2620 y Fh()479 2707 y()479 2795 y()479 2882 +y()479 3062 y Fi(This)19 +b(is)f(a)h(le)o(gal)g(instance:)479 3233 y Fh(Some)44 +b(characters<)q(/q>)479 3413 y Fi(\(Note:)19 +b Fg(<)p Fh(s/)p Fg(>)g Fi(is)g(an)g(abbre)n(viation)h(for)f +Fg(<)p Fh(s)p Fg(><)p Fh(/s)p Fg(>)p Fi(.\))g(It)f(w)o(ould)i(be)f +(ille)o(gal)f(to)h(lea)o(v)o(e)g Fh()h Fi(out)f(because)h(at)f +(least)f(one)479 3510 y(instance)i(of)f Fh(s)g Fi(or)g +Fh(t)g Fi(must)g(be)g(present.)g(It)f(w)o(ould)i(be)f(ille)o(gal,)f +(too,)h(if)f(characters)i(e)o(xisted)f(outside)h(the)e +Fh(r)i Fi(element;)f(the)g(only)479 3607 y(e)o(xception)h(is)f(white)g +(space.)g(\226)g(This)f(is)h(le)o(gal,)f(too:)479 3778 +y Fh(<)q(/q>)q()-2 4230 +y Fp(1.2.4.)35 b(Attrib)n(ute)e(lists)h(and)g(A)-11 b(TTLIST)34 +b(dec)n(larations)396 4398 y Fv(Elements)20 b(may)g(ha)n(v)o(e)f +(attrib)n(utes.)h(These)g(are)g(put)g(into)g(the)g(start)h(tag)f(of)g +(an)g(element)g(as)h(follo)n(ws:)396 4578 y Fo(<)p Fn(element-name)43 +b(attribute)1444 4608 y Fk(1)1469 4578 y Fq(=")p Fn(value)1784 +4608 y Fk(1)1810 4578 y Fq(")i(...)f Fn(attribute)2484 +4608 y Fk(n)2509 4578 y Fq(=")p Fn(value)2824 4608 y +Fk(n)2850 4578 y Fq(")p Fo(>)396 4769 y Fv(Instead)20 +b(of)g Fq(")p Fn(value)1017 4799 y Fk(k)1043 4769 y Fq(")g +Fv(it)h(is)g(also)g(possible)f(to)g(use)g(single)g(quotes)g(as)h(in)f +Fq(')p Fn(value)2817 4799 y Fk(k)2843 4769 y Fq(')p Fv(.)g(Note)h(that) +f(you)f(cannot)g(use)396 4877 y(double)g(quotes)h(literally)g(within)g +(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(if)h(double)d(quotes)i(are) +g(the)g(delimiters;)g(the)g(same)p Black 3800 5278 a +Fr(15)p Black eop +%%Page: 16 16 +16 15 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 396 579 a Fv(applies)f(to)h(single)f(quotes.)f(Y)-9 +b(ou)20 b(can)g(generally)e(not)i(use)g Fm(<)h Fv(and)e(&)i(as)g +(characters)e(in)h(attrib)n(ute)g(v)n(alues.)g(It)g(is)396 +687 y(possible)g(to)h(include)e(the)h(paraphrases)e(<,)j(>,)f +(&,)f(',)g(and)h(")f(\(and)g(an)o(y)g(other)h(reference)e +(to)j(a)396 795 y(general)e(entity)h(as)h(long)f(as)g(the)h(entity)f +(is)h(not)e(de\002ned)h(by)f(an)i(e)o(xternal)d(\002le\))j(as)g(well)g +(as)g(&#)p Fl(n)p Fv(;.)396 944 y(Before)f(you)f(can)h(use)h(an)f +(attrib)n(ute)g(you)f(must)h(declare)g(it.)g(An)g(A)-9 +b(TTLIST)20 b(declaration)e(looks)i(as)h(follo)n(ws:)396 +1124 y Fo(<)p Fq(!ATTLIST)43 b Fn(element-name)845 1222 +y(attribute-name)f(attribute-type)h(attribute-default)845 +1319 y Fq(...)845 1416 y Fn(attribute-name)f(attribute-type)h +(attribute-default)396 1513 y Fo(>)396 1704 y Fv(There)20 +b(are)g(a)g(lot)h(of)f(types,)f(b)n(ut)i(most)f(important)f(are:)p +Black 396 2061 a Ft(\225)p Black 60 w Fq(CDATA)p Fv(:)h(Ev)o(ery)f +(string)h(is)h(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)p +Black 396 2169 a Ft(\225)p Black 60 w Fq(NMTOKEN)p Fv(:)g(Ev)o(ery)f +(nametok)o(en)f(is)j(allo)n(wed)f(as)g(attrib)n(ute)g(v)n(alue.)g +(Nametok)o(ens)f(consist)h(\(mainly\))f(of)g(letters,)479 +2277 y(digits,)h(.,)h(:,)f(-,)g(_)h(in)f(arbitrary)f(order)-5 +b(.)p Black 396 2385 a Ft(\225)p Black 60 w Fq(NMTOKENS)p +Fv(:)20 b(A)g(space-separated)f(list)i(of)f(nametok)o(ens)e(is)k(allo)n +(wed)d(as)i(attrib)n(ute)f(v)n(alue.)396 2534 y(The)g(most)g +(interesting)g(def)o(ault)f(declarations)g(are:)p Black +396 2767 a Ft(\225)p Black 60 w Fq(#REQUIRED)p Fv(:)h(The)f(attrib)n +(ute)h(must)g(be)h(speci\002ed.)p Black 396 2874 a Ft(\225)p +Black 60 w Fq(#IMPLIED)p Fv(:)e(The)h(attrib)n(ute)f(can)g(be)h +(speci\002ed)f(b)n(ut)h(also)g(can)f(be)h(left)g(out.)f(The)g +(application)g(can)g(\002nd)g(out)h(whether)479 2982 +y(the)g(attrib)n(ute)g(w)o(as)h(present)f(or)g(not.)p +Black 396 3090 a Ft(\225)p Black 60 w Fq(")p Fn(value)p +Fq(")g Fv(or)f Fq(')p Fn(value)p Fq(')p Fv(:)h(This)g(particular)e(v)n +(alue)i(is)g(used)g(as)h(def)o(ault)e(if)h(the)g(attrib)n(ute)g(is)g +(omitted)g(in)g(the)g(element.)396 3378 y Fj(Example)479 +3525 y Fi(This)f(is)f(a)h(v)n(alid)g(attrib)o(ute)g(declaration)g(for)g +(element)g(type)h Fh(r)p Fi(:)479 3695 y Fh()479 4137 y Fi(This)19 b(means)g(that)g Fh(x)g +Fi(is)g(a)g(required)g(attrib)o(ute)f(that)h(cannot)h(be)f(left)g(out,) +f(while)h Fh(y)g Fi(and)h Fh(z)f Fi(are)g(optional.)g(The)g(XML)g +(parser)479 4235 y(indicates)h(the)f(application)g(whether)h +Fh(y)f Fi(is)f(present)i(or)f(not,)f(b)o(ut)h(if)f Fh(z)h +Fi(is)g(missing)g(the)g(def)o(ault)h(v)n(alue)f("one)h(tw)o(o)f(three") +h(is)479 4332 y(returned)g(automatically)-5 b(.)479 4470 +y(This)19 b(is)f(a)h(v)n(alid)g(e)o(xample)h(of)f(these)g(attrib)o +(utes:)479 4641 y Fh()p Black 3798 5278 a Fr(16)p +Black eop +%%Page: 17 17 +17 16 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black -2 583 a Fp(1.2.5.)35 b(P)l(ar)n(sed)g(entities)396 +751 y Fv(Elements)20 b(describe)f(the)i(logical)e(structure)h(of)g(the) +g(document,)e(while)i Fr(entities)g Fv(determine)f(the)h(physical)g +(structure.)396 859 y(Entities)h(are)f(the)g(pieces)g(of)g(te)o(xt)g +(the)g(parser)g(operates)f(on,)h(mostly)g(\002les)h(and)f(macros.)f +(Entities)h(may)g(be)g Fr(par)o(sed)i Fv(in)396 967 y(which)e(case)h +(the)f(parser)f(reads)h(the)g(te)o(xt)h(and)e(interprets)g(it)i(as)g +(XML)g(markup,)d(or)i Fr(unpar)o(sed)h Fv(which)e(simply)h(means)396 +1075 y(that)h(the)f(data)g(of)g(the)g(entity)g(has)g(a)h(foreign)d +(format)h(\(e.g.)h(a)g(GIF)h(icon\).)396 1224 y(If)f(the)g(parsed)f +(entity)g(is)i(going)e(to)h(be)g(used)f(as)i(part)e(of)h(the)g(DTD,)g +(it)g(is)h(called)f(a)g Fr(par)o(ameter)f(entity)p Fv(.)h(Y)-9 +b(ou)19 b(can)h(declare)396 1332 y(a)h(parameter)e(entity)g(with)i(a)f +(\002x)o(ed)g(te)o(xt)g(as)h(content)e(by:)396 1512 y +Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(")p Fn(value)p +Fq(")p Fo(>)396 1703 y Fv(W)m(ithin)20 b(the)h(DTD,)f(you)f(can)h +Fr(r)m(efer)h(to)f Fv(this)h(entity)-5 b(,)19 b(i.e.)i(read)e(the)h(te) +o(xt)g(of)g(the)h(entity)-5 b(,)19 b(by:)396 1883 y Fq(\045)p +Fn(name)p Fq(;)396 2074 y Fv(Such)h(entities)h(beha)n(v)o(e)e(lik)o(e)h +(macros,)f(i.e.)i(when)e(the)o(y)h(are)g(referred)e(to,)i(the)g(macro)g +(te)o(xt)g(is)h(inserted)e(and)h(read)396 2182 y(instead)g(of)g(the)g +(original)f(te)o(xt.)396 2478 y Fj(Example)479 2625 y +Fi(F)o(or)g(e)o(xample,)g(you)h(can)f(declare)h(tw)o(o)f(elements)g +(with)f(the)h(same)h(content)f(model)h(by:)479 2795 y +Fh()479 +2882 y()479 2970 y()396 3202 y Fv(If)20 b(the)h(contents)e(of)h(the)g +(entity)g(are)g(gi)n(v)o(en)f(as)i(string)f(constant,)f(the)h(entity)g +(is)h(called)f(an)g Fr(internal)g Fv(entity)-5 b(.)19 +b(It)i(is)g(also)396 3310 y(possible)f(to)h(name)e(a)i(\002le)g(to)f +(be)g(used)g(as)h(content)e(\(an)h Fr(e)n(xternal)g Fv(entity\):)396 +3490 y Fo(<)p Fq(!ENTITY)44 b(\045)g Fn(name)g Fq(SYSTEM)g(")p +Fn(file)g(name)p Fq(")p Fo(>)396 3681 y Fv(There)20 b(are)g(some)g +(restrictions)f(for)h(parameter)f(entities:)p Black 396 +4038 a Ft(\225)p Black 60 w Fv(If)h(the)h(internal)e(parameter)g +(entity)g(contains)h(the)g(\002rst)h(tok)o(en)e(of)h(a)h(declaration)e +(\(i.e.)g Fo(<)p Fq(!)p Fv(\),)h(it)h(must)f(also)h(contain)479 +4146 y(the)f(last)i(tok)o(en)d(of)h(the)g(declaration,)e(i.e.)j(the)f +Fo(>)p Fv(.)g(This)g(means)g(that)h(the)f(entity)g(either)g(contains)f +(a)i(whole)e(number)479 4254 y(of)h(complete)f(declarations,)g(or)h +(some)g(te)o(xt)g(from)f(the)h(middle)g(of)g(one)f(declaration.)479 +4404 y Fr(Ille)m(gal:)479 4542 y Fq(">)479 4639 y()j Fv(is)h(contained)e(in)h(the)h(entity)e Fq(e)p +Fv(.)p Black 3797 5278 a Fr(17)p Black eop +%%Page: 18 18 +18 17 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black Black 396 579 a Ft(\225)p Black 60 w Fv(If)f(the)h(internal)e +(parameter)g(entity)g(contains)h(a)h(left)f(paranthesis,)f(it)i(must)f +(also)h(contain)e(the)h(corresponding)d(right)479 687 +y(paranthesis.)479 836 y Fr(Ille)m(gal:)479 975 y Fq()479 1072 y()479 1222 y Fv(Because)21 b Fq(\()f Fv(is)h(contained)e(in)h +(the)g(entity)g Fq(e)p Fv(,)h(and)e(the)i(corresponding)16 +b Fq(\))21 b Fv(is)g(contained)e(in)h(the)g(main)g(entity)-5 +b(.)p Black 396 1371 a Ft(\225)p Black 60 w Fv(When)20 +b(reading)e(te)o(xt)i(from)f(an)g(entity)-5 b(,)19 b(the)h(parser)f +(automatically)f(inserts)i(one)g(space)f(character)g(before)f(the)i +(entity)479 1479 y(te)o(xt)g(and)g(one)g(space)g(character)f(after)h +(the)g(entity)g(te)o(xt.)f(Ho)n(we)n(v)o(er)m(,)f(this)j(rule)f(is)h +(not)f(applied)f(within)h(the)g(de\002nition)479 1587 +y(of)g(another)f(entity)-5 b(.)479 1736 y Fr(Le)m(gal:)479 +1875 y Fq()479 1972 +y()479 2121 +y Fv(Because)21 b Fq(\045suffix;)e Fv(is)i(referenced)d(within)i(the)g +(de\002nition)f(te)o(xt)h(for)g Fq(iconfile)p Fv(,)f(no)h(additional)f +(spaces)h(are)479 2229 y(added.)479 2379 y Fr(Ille)m(gal:)479 +2517 y Fq()479 2615 +y()479 2764 y Fv(Because)21 +b Fq(\045suffix;)e Fv(is)i(referenced)d(outside)i(the)g(de\002nition)f +(te)o(xt)h(of)g(another)f(entity)-5 b(,)19 b(the)h(parser)g(replaces) +479 2872 y Fq(\045suffix;)g Fv(by)f Fn(space)p Fq(test)p +Fn(space)p Fv(.)479 3021 y Fr(Ille)m(gal:)479 3160 y +Fq()479 +3257 y()479 3407 y Fv(Because)21 +b(there)e(is)j(a)e(whitespace)g(between)f Fq(\))i Fv(and)e +Fq(*)p Fv(,)i(which)e(is)i(ille)o(gal.)p Black 396 3556 +a Ft(\225)p Black 60 w Fv(An)f(e)o(xternal)f(parameter)g(entity)h(must) +g(al)o(w)o(ays)h(consist)f(of)g(a)h(whole)e(number)g(of)h(complete)f +(declarations.)p Black 396 3664 a Ft(\225)p Black 60 +w Fv(In)h(the)g(internal)g(subset)g(of)g(the)g(DTD,)g(a)h(reference)d +(to)j(a)f(parameter)f(entity)h(\(internal)f(or)h(e)o(xternal\))e(is)k +(only)479 3772 y(allo)n(wed)e(at)h(positions)e(where)h(a)g(ne)n(w)g +(declaration)f(can)h(start.)396 3963 y(If)g(the)f(parsed)g(entity)g(is) +h(going)e(to)i(be)f(used)g(in)h(the)f(document)e(instance,)i(it)h(is)h +(called)e(a)h Fr(g)o(ener)o(al)e(entity)p Fv(.)h(Such)g(entities)396 +4071 y(can)h(be)g(used)g(as)h(abbre)n(viations)d(for)i(frequent)e +(phrases,)i(or)g(to)g(include)f(e)o(xternal)g(\002les.)i(Internal)e +(general)g(entities)i(are)396 4179 y(declared)e(as)i(follo)n(ws:)396 +4359 y Fo(<)p Fq(!ENTITY)44 b Fn(name)g Fq(")p Fn(value)p +Fq(")p Fo(>)396 4550 y Fv(External)19 b(general)g(entities)i(are)f +(declared)f(this)i(w)o(ay:)396 4730 y Fo(<)p Fq(!ENTITY)44 +b Fn(name)g Fq(SYSTEM)g(")p Fn(file)g(name)p Fq(")p Fo(>)p +Black 3800 5278 a Fr(18)p Black eop +%%Page: 19 19 +19 18 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 396 579 a Fv(References)f(to)g(general)f(entities)i(are)f +(written)g(as:)396 759 y Fq(&)p Fn(name)p Fq(;)396 950 +y Fv(The)g(main)g(dif)n(ference)e(between)h(parameter)g(and)h(general)f +(entities)h(is)i(that)e(the)g(former)f(are)h(only)f(recognized)f(in)j +(the)396 1058 y(DTD)g(and)e(that)i(the)f(latter)g(are)g(only)g +(recognized)e(in)i(the)g(document)e(instance.)i(As)h(the)f(DTD)g(is)i +(parsed)d(before)g(the)396 1166 y(document,)f(the)i(parameter)f +(entities)i(are)f(e)o(xpanded)d(\002rst;)k(for)f(e)o(xample)f(it)i(is)g +(possible)f(to)g(use)h(the)f(content)f(of)h(a)396 1274 +y(parameter)f(entity)h(as)h(the)f(name)g(of)f(a)i(general)e(entity:)h +Fq(&\045name;;)2557 1241 y Ff(1)2580 1274 y Fv(.)396 +1423 y(General)g(entities)g(must)h(respect)e(the)i(element)e(hierarchy) +-5 b(.)17 b(This)k(means)f(that)g(there)g(must)g(be)g(an)g(end)g(tag)g +(for)g(e)n(v)o(ery)396 1531 y(start)h(tag)f(in)h(the)f(entity)g(v)n +(alue,)f(and)h(that)g(end)f(tags)i(without)e(corresponding)e(start)k +(tags)f(are)g(not)g(allo)n(wed.)396 1777 y Fj(Example)479 +1924 y Fi(If)f(the)f(author)i(of)f(a)f(document)j(changes)f(sometimes,) +f(it)f(is)g(w)o(orthwhile)h(to)g(set)f(up)i(a)e(general)i(entity)e +(containing)i(the)f(names)479 2021 y(of)g(the)g(authors.)h(If)e(the)h +(author)h(changes,)g(you)f(need)h(only)g(to)e(change)j(the)e +(de\002nition)g(of)g(the)g(entity)-5 b(,)18 b(and)i(do)f(not)h(need)f +(to)479 2118 y(check)h(all)f(occurrences)h(of)f(authors')h(names:)479 +2289 y Fh()479 +2469 y Fi(In)19 b(the)g(document)i(te)o(xt,)d(you)i(can)f(no)n(w)h +(refer)e(to)h(the)g(author)h(names)f(by)h(writing)e Fh(&authors;)p +Fi(.)479 2607 y Fe(Ille)m(gal:)h Fi(The)g(follo)n(wing)g(tw)o(o)g +(entities)g(are)g(ille)o(gal)f(because)i(the)f(elements)g(in)g(the)g +(de\002nition)g(do)g(not)h(nest)f(properly:)479 2778 +y Fh()q(">)479 2865 y(">)396 3139 y Fv(Earlier)20 b(in)g(this)h(introduction)d(we)i +(e)o(xplained)e(that)j(there)e(are)i(substitutes)f(for)g(reserv)o(ed)e +(characters:)i(<,)g(>,)396 3247 y(&,)f(',)h(and)f +(".)g(These)h(are)g(simply)g(prede\002ned)e(general)h(entities;)i +(note)f(that)g(the)o(y)g(are)g(the)g(only)396 3355 y(prede\002ned)e +(entities.)j(It)f(is)h(allo)n(wed)f(to)g(de\002ne)g(these)g(entities)h +(again)e(as)i(long)e(as)i(the)f(meaning)f(is)i(unchanged.)-2 +3725 y Fp(1.2.6.)35 b(Notations)g(and)e(unpar)n(sed)i(entities)396 +3892 y Fv(Unparsed)19 b(entities)i(ha)n(v)o(e)e(a)i(foreign)d(format)i +(and)f(can)h(thus)g(not)g(be)g(read)g(by)g(the)g(XML)g(parser)-5 +b(.)20 b(Unparsed)f(entities)396 4000 y(are)h(al)o(w)o(ays)h(e)o +(xternal.)e(The)h(format)f(of)h(an)g(unparsed)e(entity)i(must)g(ha)n(v) +o(e)g(been)f(declared,)g(such)h(a)h(format)e(is)i(called)f(a)396 +4108 y Fr(notation)p Fv(.)f(The)g(entity)h(can)g(then)g(be)g(declared)f +(by)h(referring)e(to)i(this)h(notation.)e(As)i(unparsed)d(entities)j +(do)f(not)396 4216 y(contain)f(XML)i(te)o(xt,)e(it)i(is)h(not)d +(possible)h(to)h(include)e(them)h(directly)f(into)h(the)g(document;)e +(you)i(can)g(only)f(declare)396 4324 y(attrib)n(utes)h(such)g(that)h +(names)e(of)h(unparsed)f(entities)h(are)h(acceptable)e(v)n(alues.)396 +4474 y(As)i(you)f(can)g(see,)g(unparsed)f(entities)h(are)g(too)g +(complicated)f(in)h(order)f(to)h(ha)n(v)o(e)g(an)o(y)f(purpose.)g(It)h +(is)h(almost)f(al)o(w)o(ays)396 4581 y(better)g(to)g(simply)g(pass)h +(the)f(name)g(of)g(the)g(data)g(\002le)h(as)g(normal)e(attrib)n(ute)g +(v)n(alue,)h(and)f(let)i(the)f(application)f(recognize)396 +4689 y(and)h(process)g(the)g(foreign)e(format.)p Black +3800 5278 a Fr(19)p Black eop +%%Page: 20 20 +20 19 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black -2 597 a Fx(1.3.)39 b(A)g(complete)f(e)n(xample:)g(The)h +Fd(readme)k Fx(DTD)396 777 y Fv(The)20 b(reason)g(for)f +Fr(r)m(eadme)h Fv(w)o(as)h(that)f(I)g(often)g(wrote)g(tw)o(o)g(v)o +(ersions)f(of)h(\002les)h(such)f(as)h(README)g(and)e(INST)-8 +b(ALL)396 885 y(which)20 b(e)o(xplain)f(aspects)h(of)g(a)h(distrib)n +(uted)e(softw)o(are)h(archi)n(v)o(e;)f(one)g(v)o(ersion)g(w)o(as)i +(ASCII-formatted,)d(the)i(other)g(w)o(as)396 993 y(written)g(in)h +(HTML.)e(Maintaining)g(both)g(v)o(ersions)h(means)f(double)g(amount)g +(of)h(w)o(ork,)f(and)h(changes)f(of)h(one)f(v)o(ersion)396 +1101 y(may)h(be)g(for)o(gotten)e(in)i(the)g(other)f(v)o(ersion.)g(T)-7 +b(o)20 b(impro)o(v)o(e)e(this)j(situation)e(I)i(in)m(v)o(ented)d(the)i +Fr(r)m(eadme)g Fv(DTD)g(which)f(allo)n(ws)396 1209 y(me)h(to)h +(maintain)e(only)h(one)f(source)h(written)g(as)g(XML)h(document,)d(and) +h(to)i(generate)e(the)h(ASCII)g(and)g(the)g(HTML)396 +1317 y(v)o(ersion)f(from)g(it.)396 1466 y(In)h(this)h(section,)f(I)g(e) +o(xplain)f(only)g(the)i(DTD.)f(The)f Fr(r)m(eadme)h Fv(DTD)h(is)g +(contained)d(in)j(the)f(PXP)h(distrib)n(ution)e(together)396 +1574 y(with)i(the)f(tw)o(o)g(con)m(v)o(erters)e(to)j(produce)d(ASCII)i +(and)g(HTML.)g(Another)e(section)i(of)g(this)h(manual)e(describes)h +(the)396 1682 y(HTML)g(con)m(v)o(erter)-5 b(.)396 1831 +y(The)20 b(documents)f(ha)n(v)o(e)g(a)i(simple)f(structure:)f(There)h +(are)g(up)g(to)g(three)g(le)n(v)o(els)g(of)g(nested)g(sections,)g +(paragraphs,)d(item)396 1939 y(lists,)22 b(footnotes,)c(hyperlinks,)g +(and)h(te)o(xt)h(emphasis.)g(The)g(outermost)f(element)g(has)i(usually) +e(the)h(type)g Fq(readme)p Fv(,)g(it)h(is)396 2047 y(declared)e(by)396 +2228 y Fq()396 2325 +y()396 +2613 y Fv(This)21 b(means)f(that)g(this)h(element)e(contains)h(one)f +(or)h(more)f(sections)i(of)f(the)g(\002rst)h(le)n(v)o(el)f(\(element)f +(type)h Fq(sect1)p Fv(\),)f(and)396 2721 y(that)i(the)f(element)f(has)i +(a)f(required)f(attrib)n(ute)h Fq(title)f Fv(containing)g(character)g +(data)h(\(CD)m(A)-9 b(T)h(A\).)19 b(Note)h(that)h Fq(readme)396 +2829 y Fv(elements)f(must)g(not)g(contain)f(te)o(xt)h(data.)396 +2978 y(The)g(three)g(le)n(v)o(els)g(of)g(sections)g(are)g(declared)f +(as)i(follo)n(ws:)396 3158 y Fq()396 3352 y()396 3547 y()396 3738 y Fv(Ev)o(ery)19 b(section)h(has)g(a)h +Fq(title)f Fv(element)g(as)g(\002rst)h(subelement.)e(After)h(the)g +(title)h(an)f(arbitrary)f(b)n(ut)h(non-empty)396 3846 +y(sequence)f(of)h(inner)g(sections,)g(paragraphs)e(and)h(item)i(lists)g +(follo)n(ws.)f(Note)g(that)g(the)g(inner)g(sections)g(must)g(belong)f +(to)396 3954 y(the)h(ne)o(xt)g(higher)f(section)h(le)n(v)o(el;)g +Fq(sect3)g Fv(elements)f(must)i(not)f(contain)f(inner)g(sections)h +(because)g(there)g(is)h(no)e(ne)o(xt)396 4061 y(higher)g(le)n(v)o(el.) +396 4211 y(Ob)o(viously)-5 b(,)18 b(all)j(three)f(declarations)e(allo)n +(w)j(paragraphs)d(\()p Fq(p)p Fv(\))h(and)h(item)g(lists)i(\()p +Fq(ul)p Fv(\).)e(The)f(de\002nition)g(can)h(be)396 4319 +y(simpli\002ed)g(at)h(this)g(point)e(by)h(using)f(a)i(parameter)e +(entity:)396 4499 y Fq()396 +4693 y()p +Black 3800 5278 a Fr(20)p Black eop +%%Page: 21 21 +21 20 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 396 579 a Fq()396 773 y()396 964 y Fv(Here,)20 b(the)g(entity)g +Fq(p.like)g Fv(is)h(nothing)e(b)n(ut)h(a)g(macro)g(abbre)n(viating)d +(the)j(same)h(sequence)e(of)h(declarations;)f(if)h(ne)n(w)396 +1072 y(elements)f(on)h(the)f(same)h(le)n(v)o(el)f(as)h +Fq(p)g Fv(and)f Fq(ul)h Fv(are)f(later)h(added,)e(it)i(is)h(suf)n +(\002cient)e(only)f(to)i(change)e(the)i(entity)f(de\002nition.)396 +1180 y(Note)h(that)h(there)e(are)i(some)f(restrictions)f(on)h(the)g +(usage)g(of)g(entities)h(in)f(this)h(conte)o(xt;)e(most)h(important,)e +(entities)396 1288 y(containing)h(a)h(left)h(paranthesis)e(must)h(also) +h(contain)e(the)h(corresponding)d(right)i(paranthesis.)396 +1437 y(Note)h(that)h(the)f(entity)g Fq(p.like)g Fv(is)h(a)f +Fr(par)o(ameter)i Fv(entity)-5 b(,)19 b(i.e.)h(the)g(ENTITY)g +(declaration)e(contains)i(a)g(percent)f(sign,)396 1545 +y(and)h(the)g(entity)g(is)h(referred)e(to)h(by)g Fq(\045p.like;)p +Fv(.)f(This)h(kind)g(of)f(entity)h(must)h(be)f(used)g(to)g(abbre)n +(viate)e(parts)j(of)f(the)396 1653 y(DTD;)g(the)g Fr(g)o(ener)o(al)f +Fv(entities)h(declared)e(without)h(percent)g(sign)h(and)f(referred)f +(to)i(as)g Fq(&name;)f Fv(are)h(not)f(allo)n(wed)g(in)h(this)396 +1761 y(conte)o(xt.)396 1911 y(The)g Fq(title)g Fv(element)g +(speci\002es)g(the)h(title)f(of)g(the)h(section)f(in)g(which)g(it)g +(occurs.)g(The)f(title)i(is)h(gi)n(v)o(en)c(as)j(character)396 +2019 y(data,)f(optionally)f(interspersed)f(with)j(line)f(breaks)g(\()p +Fq(br)p Fv(\):)396 2199 y Fq() +396 2390 y Fv(Compared)19 b(with)h(the)g Fq(title)g Fr(attrib)n(ute)g +Fv(of)g(the)h Fq(readme)e Fv(element,)h(this)g(element)g(allo)n(ws)g +(inner)g(markup)e(\(i.e.)i Fq(br)p Fv(\))396 2498 y(while)g(attrib)n +(ute)g(v)n(alues)g(do)g(not:)g(It)g(is)h(an)g(error)e(if)h(an)g(attrib) +n(ute)g(v)n(alue)g(contains)f(the)h(left)h(angle)e(brack)o(et)g +Fm(<)i Fv(literally)396 2605 y(such)f(that)g(it)h(is)h(impossible)d(to) +h(include)g(inner)f(elements.)396 2755 y(The)h(paragraph)e(element)h +Fq(p)i Fv(has)f(a)h(structure)e(similar)i(to)f Fq(title)p +Fv(,)g(b)n(ut)g(it)h(allo)n(ws)f(more)g(inner)f(elements:)396 +2935 y Fq()396 +3129 y()396 3320 +y Fv(Line)20 b(breaks)g(do)f(not)h(ha)n(v)o(e)g(inner)f(structure,)g +(so)i(the)o(y)e(are)h(declared)f(as)i(being)e(empty:)396 +3500 y Fq()396 3691 y Fv(This)21 +b(means)f(that)g(really)g(nothing)e(is)j(allo)n(wed)f(within)g +Fq(br)p Fv(;)g(you)f(must)i(al)o(w)o(ays)f(write)h Fq(

)e +Fv(or)h(abbre)n(viated)396 3799 y Fq(
)p Fv(.)396 +3949 y(Code)g(samples)h(should)e(be)h(mark)o(ed)f(up)h(by)f(the)h +Fq(code)h Fv(tag;)f(emphasized)f(te)o(xt)h(can)g(be)g(indicated)f(by)h +Fq(em)p Fv(:)396 4129 y Fq()396 +4323 y()396 4514 +y Fv(That)20 b Fq(code)g Fv(elements)g(are)g(not)g(allo)n(wed)g(to)g +(contain)f(further)g(markup)f(while)i Fq(em)h Fv(elements)f(do)g(is)h +(a)f(design)g(decision)396 4622 y(by)g(the)g(author)f(of)h(the)g(DTD.) +396 4772 y(Unordered)e(lists)k(simply)d(consists)i(of)f(one)g(or)g +(more)f(list)i(items,)g(and)e(a)i(list)g(item)g(may)e(contain)g +(paragraph-le)n(v)o(el)396 4879 y(material:)p Black 3800 +5278 a Fr(21)p Black eop +%%Page: 22 22 +22 21 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 396 579 a Fq()396 773 +y()396 964 y Fv(F)o(ootnotes)19 +b(are)h(described)f(by)h(the)g(te)o(xt)g(of)g(the)g(note;)g(this)h(te)o +(xt)f(may)g(contain)f(te)o(xt-le)n(v)o(el)g(markup.)f(There)h(is)i(no) +396 1072 y(mechanism)e(to)i(describe)e(the)h(numbering)e(scheme)h(of)h +(footnotes,)f(or)h(to)g(specify)g(ho)n(w)f(footnote)g(references)f(are) +396 1180 y(printed.)396 1360 y Fq()396 1551 y Fv(Hyperlinks)19 +b(are)h(written)g(as)h(in)f(HTML.)g(The)g(anchor)f(tag)h(contains)f +(the)h(te)o(xt)g(describing)f(where)h(the)g(link)g(points)g(to,)396 +1659 y(and)g(the)g Fq(href)g Fv(attrib)n(ute)g(is)h(the)f(pointer)f +(\(as)i(URL\).)f(There)f(is)j(no)d(w)o(ay)i(to)f(describe)f(locations)h +(of)g("hash)g(marks".)f(If)396 1767 y(the)h(link)g(refers)g(to)g +(another)f Fr(r)m(eadme)h Fv(document,)e(the)i(attrib)n(ute)g +Fq(readmeref)f Fv(should)g(be)h(used)g(instead)g(of)g +Fq(href)p Fv(.)396 1875 y(The)g(reason)g(is)h(that)f(the)g(con)m(v)o +(erted)e(document)g(has)i(usually)g(a)h(dif)n(ferent)d(system)i +(identi\002er)g(\(\002le)h(name\),)d(and)i(the)396 1983 +y(link)g(to)h(a)f(con)m(v)o(erted)e(document)g(must)i(be)g(con)m(v)o +(erted,)e(too.)396 2163 y Fq()396 +2260 y()396 +2742 y Fv(Note)20 b(that)h(although)d(it)j(is)g(only)e(sensible)i(to)f +(specify)g(one)f(of)h(the)g(tw)o(o)h(attrib)n(utes,)f(the)g(DTD)g(has)h +(no)e(means)h(to)396 2850 y(e)o(xpress)g(this)g(restriction.)396 +3000 y(So)h(f)o(ar)f(the)g(DTD.)g(Finally)-5 b(,)19 b(here)h(is)h(a)g +(document)d(for)i(it:)396 3180 y Fq()396 3277 y()396 3374 y()396 3471 y()486 3569 y +(Usage)486 3666 y(

)576 3763 y(The)g(readme)e +(converter)i(is)g(invoked)g(on)g(the)h(command)e(line)h(by:)486 +3860 y(

)486 3957 y(

)576 4054 y(readme)e([)j(-text)f(|)h +(-html)f(])g(input.xml)486 4151 y(

)486 4248 +y(

)576 4346 y(Here)g(a)g(list)h(of)f(options:)486 +4443 y(

)486 4540 y(
    )576 4637 y(
  • )665 4734 +y(

    -)396 4831 y(text:)f(specifies)g(that)i(ASCII)f +(output)f(should)h(be)h(produced

    )p Black 3800 5278 +a Fr(22)p Black eop +%%Page: 23 23 +23 22 bop Black 3028 67 a Fr(Chapter)19 b(1.)h(What)h(is)g(XML?)p +Black 576 579 a Fq(
  • )576 676 y(
  • )665 773 y(

    -)396 +870 y(html:)43 b(specifies)g(that)i(HTML)f(output)g(should)f(be) +i(produced

    )576 967 y(
  • )486 1065 y(
)486 1162 +y(

)576 1259 y(The)f(input)g(file)g(must)g(be)h(given)f(on)g(the)h +(command)e(line.)h(The)h(converted)e(output)h(is)576 +1356 y(printed)f(to)i(stdout.)486 1453 y(

)396 +1550 y()396 1647 y()486 1745 y(Author)486 +1842 y(

)576 1939 y(The)f(program)g(has)g(been)g(written)g(by)576 +2036 y(Ge) +o(rd)39 b(Stolpmann.)486 2133 y(

)396 2230 y(
)396 +2327 y()-2 2746 y Fx(Notes)p Black 396 2926 +a Fv(1.)p Black 70 w(This)20 b(construct)g(is)h(only)e(allo)n(wed)h +(within)g(the)g(de\002nition)f(of)h(another)e(entity;)i(otherwise)g(e)o +(xtra)f(spaces)i(w)o(ould)529 3034 y(be)f(added)f(\(as)i(e)o(xplained)d +(abo)o(v)o(e\).)g(Such)i(indirection)e(is)j(not)f(recommended.)529 +3172 y Fi(Complete)f(e)o(xample:)529 3343 y Fh()243 b()529 +3430 y()529 +3518 y()529 +3605 y()529 +3785 y Fi(Y)-8 b(ou)19 b(can)h(no)n(w)f(write)f Fh(&text;)j +Fi(in)e(the)g(document)h(instance,)f(and)h(depending)h(on)e(the)g(v)n +(alue)g(of)g Fh(variant)i Fi(either)e Fh(text-a)i Fi(or)529 +3882 y Fh(text-b)g Fi(is)d(inserted.)p Black 3800 5278 +a Fr(23)p Black eop +%%Page: 24 24 +24 23 bop Black Black -2 621 a Fs(Chapter)48 b(2.)f(Using)i(PXP)-2 +1055 y Fx(2.1.)39 b(V)-9 b(alidation)396 1235 y Fv(The)20 +b(parser)g(can)g(be)g(used)g(to)g Fr(validate)f Fv(a)i(document.)d +(This)i(means)g(that)g(all)h(the)f(constraints)g(that)g(must)g(hold)g +(for)f(a)396 1343 y(v)n(alid)h(document)e(are)i(actually)g(check)o(ed.) +f(V)-9 b(alidation)19 b(is)i(the)f(def)o(ault)f(mode)h(of)g(PXP,)g +(i.e.)h(e)n(v)o(ery)d(document)h(is)396 1451 y(v)n(alidated)g(while)i +(it)f(is)i(being)d(parsed.)396 1600 y(In)h(the)g Fq(examples)g +Fv(directory)e(of)i(the)g(distrib)n(ution)f(you)h(\002nd)g(the)g +Fq(pxpvalidate)f Fv(application.)f(It)j(is)g(in)m(v)n(ok)o(ed)d(in)j +(the)396 1708 y(follo)n(wing)e(w)o(ay:)396 1888 y Fq(pxpvalidate)43 +b([)i(-wf)f(])h Fn(file)p Fq(...)396 2079 y Fv(The)20 +b(\002les)h(mentioned)e(on)g(the)i(command)d(line)i(are)g(v)n +(alidated,)f(and)h(e)n(v)o(ery)e(w)o(arning)h(and)h(e)n(v)o(ery)f +(error)g(messages)h(are)396 2187 y(printed)f(to)i(stderr)-5 +b(.)396 2337 y(The)20 b(-wf)g(switch)h(modi\002es)e(the)i(beha)n(viour) +d(such)i(that)g(a)h(well-formedness)d(parser)h(is)i(simulated.)f(In)g +(this)g(mode,)f(the)396 2445 y(ELEMENT)-6 b(,)19 b(A)-9 +b(TTLIST)j(,)19 b(and)g(NO)m(T)-8 b(A)f(TION)20 b(declarations)f(of)h +(the)g(DTD)g(are)g(ignored,)e(and)i(only)f(the)i(ENTITY)396 +2553 y(declarations)e(will)i(tak)o(e)f(ef)n(fect.)g(This)g(mode)f(is)i +(intended)e(for)h(documents)e(lacking)h(a)i(DTD.)f(Please)h(note)f +(that)g(the)396 2661 y(parser)g(still)h(scans)g(the)f(DTD)g(fully)g +(and)g(will)h(report)e(all)h(errors)g(in)g(the)g(DTD;)h(such)f(checks)f +(are)h(not)g(required)f(by)g(a)396 2769 y(well-formedness)f(parser)-5 +b(.)396 2918 y(The)20 b Fq(pxpvalidate)f Fv(application)g(is)i(the)f +(simplest)h(sensible)f(program)e(using)i(PXP,)g(you)g(may)f(consider)g +(it)i(as)396 3026 y("hello)f(w)o(orld")f(program.)-2 +3445 y Fx(2.2.)39 b(Ho)n(w)g(to)g(par)n(se)f(a)i(document)d(fr)m(om)i +(an)g(application)396 3624 y Fv(Let)21 b(me)f(\002rst)h(gi)n(v)o(e)e(a) +i(rough)d(o)o(v)o(ervie)n(w)g(of)i(the)h(object)e(model)g(of)h(the)h +(parser)-5 b(.)19 b(The)h(follo)n(wing)f(items)h(are)h(represented)396 +3732 y(by)f(objects:)p Black 396 4055 a Ft(\225)p Black +60 w Fr(Documents:)f Fv(The)h(document)e(representation)g(is)j(more)e +(or)h(less)h(the)f(anchor)f(for)g(the)h(application;)f(all)i(accesses)g +(to)479 4163 y(the)f(parsed)g(entities)h(start)f(here.)g(It)g(is)h +(described)e(by)h(the)g(class)h Fq(document)f Fv(contained)e(in)j(the)f +(module)479 4271 y Fq(Pxp_document)p Fv(.)f(Y)-9 b(ou)19 +b(can)h(get)h(some)f(global)f(information,)e(such)j(as)h(the)f(XML)h +(declaration)d(the)j(document)479 4379 y(be)o(gins)f(with,)g(the)g(DTD) +g(of)g(the)g(document,)e(global)i(processing)e(instructions,)h(and)h +(most)g(important,)f(the)479 4487 y(document)f(tree.)p +Black 396 4595 a Ft(\225)p Black 60 w Fr(The)j(contents)e(of)h +(documents:)f Fv(The)h(contents)f(ha)n(v)o(e)h(the)g(structure)f(of)h +(a)h(tree:)f(Elements)g(contain)f(other)g(elements)479 +4703 y(and)h(te)o(xt)744 4670 y Ff(1)768 4703 y Fv(.)h(The)e(common)g +(type)h(to)g(represent)f(both)g(kinds)h(of)g(content)f(is)i +Fq(node)f Fv(which)g(is)h(a)g(class)g(type)e(that)479 +4811 y(uni\002es)h(the)h(properties)d(of)i(elements)g(and)g(character)f +(data.)h(Ev)o(ery)e(node)i(has)g(a)h(list)g(of)f(children)f(\(which)g +(is)i(empty)p Black 3800 5278 a Fr(24)p Black eop +%%Page: 25 25 +25 24 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 479 579 a Fv(if)h(the)f(element)g(is)h(empty)e(or)h(the)g(node)f +(represents)h(te)o(xt\);)f(nodes)h(may)g(ha)n(v)o(e)f(attrib)n(utes;)h +(nodes)g(ha)n(v)o(e)f(al)o(w)o(ays)i(te)o(xt)479 687 +y(contents.)d(There)g(are)g(tw)o(o)h(implementations)e(of)h +Fq(node)p Fv(,)h(the)f(class)i Fq(element_impl)d Fv(for)h(elements,)g +(and)g(the)h(class)479 795 y Fq(data_impl)h Fv(for)f(te)o(xt)h(data.)g +(Y)-9 b(ou)20 b(\002nd)f(these)i(classes)g(and)f(class)h(types)f(in)g +(the)g(module)f Fq(Pxp_document)p Fv(,)g(too.)479 944 +y(Note)h(that)h(attrib)n(ute)f(lists)h(are)f(represented)f(by)g +(non-class)h(v)n(alues.)p Black 396 1094 a Ft(\225)p +Black 60 w Fr(The)h(node)e(e)n(xtension:)g Fv(F)o(or)h(adv)n(anced)e +(usage,)i(e)n(v)o(ery)e(node)i(of)f(the)i(document)d(may)i(ha)n(v)o(e)f +(an)h(associated)479 1202 y Fr(e)n(xtension)g Fv(which)g(is)h(simply)f +(a)g(second)f(object.)h(This)g(object)g(must)g(ha)n(v)o(e)g(the)g +(three)g(methods)f Fq(clone)p Fv(,)g Fq(node)p Fv(,)h(and)479 +1310 y Fq(set_node)f Fv(as)h(bare)f(minimum,)e(b)n(ut)j(you)e(are)i +(free)e(to)i(add)f(methods)f(as)i(you)f(w)o(ant.)g(This)g(is)i(the)e +(preferred)e(w)o(ay)j(to)479 1417 y(add)g(functionality)e(to)i(the)h +(document)d(tree)1746 1384 y Ff(2)1770 1417 y Fv(.)j(The)e(class)j +(type)d Fq(extension)h Fv(is)h(de\002ned)e(in)h Fq(Pxp_document)p +Fv(,)f(too.)p Black 396 1525 a Ft(\225)p Black 60 w Fr(The)i(DTD:)f +Fv(Sometimes)g(it)h(is)g(necessary)e(to)i(access)f(the)h(DTD)f(of)g(a)h +(document;)d(the)i(a)n(v)o(erage)f(application)g(does)479 +1633 y(not)h(need)g(this)g(feature.)f(The)h(class)h Fq(dtd)g +Fv(describes)e(DTDs,)i(and)e(mak)o(es)h(it)h(possible)f(to)h(get)f +(representations)e(of)479 1741 y(element,)i(entity)-5 +b(,)19 b(and)h(notation)e(declarations)h(as)i(well)g(as)g(processing)e +(instructions)g(contained)f(in)j(the)f(DTD.)479 1849 +y(This)g(class,)g(and)f Fq(dtd_element)p Fv(,)g Fq(dtd_notation)p +Fv(,)e(and)i Fq(proc_instruction)f Fv(can)h(be)h(found)e(in)i(the)f +(module)479 1957 y Fq(Pxp_dtd)p Fv(.)h(There)f(are)h(a)h(couple)e(of)h +(classes)h(representing)d(dif)n(ferent)h(kinds)g(of)h(entities;)h +(these)f(can)g(be)g(found)f(in)479 2065 y(the)h(module)f +Fq(Pxp_entity)p Fv(.)396 2214 y(Additionally)-5 b(,)18 +b(the)i(follo)n(wing)f(modules)g(play)h(a)g(role:)p Black +396 2447 a Ft(\225)p Black 60 w Fr(Pxp_yacc:)e Fv(Here)i(the)h(main)e +(parsing)h(functions)e(such)i(as)h Fq(parse_document_entity)c +Fv(are)k(located.)e(Some)479 2555 y(additional)g(types)h(and)g +(functions)f(allo)n(w)h(the)g(parser)f(to)i(be)f(con\002gured)e(in)i(a) +h(non-standard)c(w)o(ay)-5 b(.)p Black 396 2663 a Ft(\225)p +Black 60 w Fr(Pxp_types:)19 b Fv(This)h(is)h(a)g(collection)e(of)h +(basic)g(types)g(and)g(e)o(xceptions.)396 2812 y(There)g(are)g(some)g +(further)e(modules)i(that)g(are)g(needed)f(internally)g(b)n(ut)h(are)g +(not)g(part)g(of)g(the)g(API.)396 2962 y(Let)h(the)f(document)e(to)i +(be)h(parsed)e(be)h(stored)g(in)g(a)h(\002le)g(called)f +Fq(doc.xml)p Fv(.)f(The)h(parsing)f(process)h(is)h(started)f(by)396 +3070 y(calling)g(the)g(function)396 3250 y Fq(val)45 +b(parse_document_entity)c(:)k(config)e(->)i(source)f(->)g('ext)g(spec)h +(->)f('ext)g(document)396 3441 y Fv(de\002ned)19 b(in)i(the)f(module)f +Fq(Pxp_yacc)p Fv(.)g(The)h(\002rst)h(ar)o(gument)d(speci\002es)i(some)g +(global)g(properties)e(of)i(the)g(parser;)g(it)h(is)396 +3549 y(recommended)c(to)j(start)g(with)g(the)g Fq(default_config)p +Fv(.)e(The)h(second)g(ar)o(gument)e(determines)i(where)g(the)h +(document)396 3657 y(to)h(be)f(parsed)f(comes)h(from;)f(this)i(may)f +(be)g(a)g(\002le,)h(a)g(channel,)d(or)i(an)g(entity)g(ID.)g(T)-7 +b(o)21 b(parse)f Fq(doc.xml)p Fv(,)f(it)i(is)g(suf)n(\002cient)396 +3764 y(to)g(pass)f Fq(from_file)44 b("doc.xml")p Fv(.)396 +3914 y(The)20 b(third)g(ar)o(gument)e(passes)i(the)h(object)e +(speci\002cation)h(to)g(use.)g(Roughly)f(speaking,)g(it)i(determines)e +(which)g(classes)396 4022 y(implement)g(the)h(node)g(objects)f(of)h +(which)g(element)g(types,)f(and)h(which)g(e)o(xtensions)f(are)h(to)g +(be)g(used.)g(The)g Fq('ext)396 4130 y Fv(polymorphic)d(v)n(ariable)i +(is)j(the)e(type)f(of)h(the)h(e)o(xtension.)d(F)o(or)i(the)g(moment,)f +(let)i(us)f(simply)g(pass)h Fq(default_spec)d Fv(as)396 +4238 y(this)j(ar)o(gument,)d(and)h(ignore)g(it.)396 4387 +y(So)i(the)f(follo)n(wing)e(e)o(xpression)h(parses)h +Fq(doc.xml)p Fv(:)396 4567 y Fq(open)44 b(Pxp_yacc)396 +4664 y(let)h(d)f(=)h(parse_document_entity)c(default_config)i +(\(from_file)g("doc.xml"\))g(de-)396 4762 y(fault_spec)p +Black 3800 5278 a Fr(25)p Black eop +%%Page: 26 26 +26 25 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fv(Note)g(that)h Fq(default_config)d +Fv(implies)i(that)h(w)o(arnings)e(are)h(collected)g(b)n(ut)g(not)g +(printed.)e(Errors)h(raise)i(one)f(of)g(the)396 687 y(e)o(xception)f +(de\002ned)g(in)h Fq(Pxp_types)p Fv(;)f(to)i(get)f(readable)f(errors)g +(and)h(w)o(arnings)f(catch)h(the)g(e)o(xceptions)f(as)i(follo)n(ws:)396 +867 y Fq(class)44 b(warner)g(=)486 964 y(object)576 1061 +y(method)f(warn)i(w)f(=)665 1158 y(print_endline)f(\("WARNING:)g(")i(^) +f(w\))486 1256 y(end)396 1353 y(;;)396 1547 y(try)486 +1644 y(let)g(config)g(=)h({)f(default_config)f(with)h(warner)g(=)h(new) +f(warner)g(})g(in)486 1741 y(let)g(d)h(=)g(parse_document_entity)c +(config)j(\(from_file)f("doc.xml"\))g(default_spec)486 +1838 y(in)576 1935 y(...)396 2033 y(with)531 2130 y(e)h(->)620 +2227 y(print_endline)f(\(Pxp_types.string_of_exn)e(e\))396 +2418 y Fv(No)n(w)20 b Fq(d)h Fv(is)g(an)f(object)g(of)g(the)g +Fq(document)f Fv(class.)i(If)f(you)g(w)o(ant)g(the)g(node)f(tree,)h +(you)g(can)g(get)g(the)g(root)f(element)h(by)396 2598 +y Fq(let)45 b(root)f(=)g(d)h(#)g(root)396 2789 y Fv(and)20 +b(if)g(you)g(w)o(ould)f(rather)h(lik)o(e)g(to)g(access)h(the)f(DTD,)g +(determine)f(it)i(by)396 2969 y Fq(let)45 b(dtd)f(=)h(d)f(#)h(dtd)396 +3160 y Fv(As)21 b(it)g(is)g(more)f(interesting,)f(let)h(us)h(in)m(v)o +(estigate)e(the)h(node)f(tree)h(no)n(w)-5 b(.)19 b(Gi)n(v)o(en)g(the)i +(root)e(element,)g(it)i(is)h(possible)d(to)396 3268 y(recursi)n(v)o +(ely)f(tra)n(v)o(erse)h(the)h(whole)f(tree.)g(The)g(children)g(of)g(a)h +(node)f Fq(n)h Fv(are)f(returned)f(by)h(the)h(method)e +Fq(sub_nodes)p Fv(,)g(and)396 3376 y(the)i(type)g(of)g(a)h(node)e(is)i +(returned)d(by)i Fq(node_type)p Fv(.)f(This)i(function)d(tra)n(v)o +(erses)i(the)g(tree,)g(and)g(prints)g(the)g(type)f(of)h(each)396 +3484 y(node:)396 3664 y Fq(let)45 b(rec)f(print_structure)e(n)j(=)486 +3761 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 3858 +y(match)g(ntype)g(with)576 3955 y(T_element)f(name)h(->)665 +4053 y(print_endline)f(\("Element)g(of)i(type)f(")h(^)f(name\);)665 +4150 y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665 +4247 y(List.iter)h(print_structure)e(children)486 4344 +y(|)j(T_data)e(->)665 4441 y(print_endline)g("Data")486 +4538 y(|)i(_)f(->)665 4635 y(\(*)h(Other)f(node)g(types)g(are)g(not)h +(possible)e(unless)h(the)g(parser)g(is)h(configured)800 +4733 y(differently.)710 4830 y(*\))p Black 3798 5278 +a Fr(26)p Black eop +%%Page: 27 27 +27 26 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 665 579 a Fq(assert)44 b(false)396 770 y Fv(Y)-9 +b(ou)20 b(can)g(call)g(this)h(function)e(by)396 950 y +Fq(print_structure)43 b(root)396 1141 y Fv(The)20 b(type)g(returned)e +(by)i Fq(node_type)f Fv(is)i(either)f Fq(T_element)43 +b(name)21 b Fv(or)e Fq(T_data)p Fv(.)h(The)g Fq(name)g +Fv(of)g(the)g(element)g(type)396 1249 y(is)h(the)g(string)e(included)g +(in)i(the)f(angle)f(brack)o(ets.)h(Note)g(that)g(only)f(elements)h(ha)n +(v)o(e)g(children;)f(data)h(nodes)f(are)h(al)o(w)o(ays)396 +1357 y(lea)n(v)o(es)h(of)e(the)i(tree.)396 1506 y(There)f(are)g(some)g +(more)f(methods)g(in)i(order)e(to)h(access)h(a)f(parsed)g(node)f(tree:) +p Black 396 1739 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(parent)p +Fv(:)19 b(Returns)h(the)h(parent)e(node,)g(or)h(raises)h +Fq(Not_found)e Fv(if)h(the)g(node)g(is)h(already)e(the)h(root)p +Black 396 1847 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(root)p +Fv(:)20 b(Returns)g(the)g(root)g(of)f(the)i(node)e(tree.)p +Black 396 1955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(attribute)e(a)p +Fv(:)21 b(Returns)f(the)g(v)n(alue)f(of)h(the)g(attrib)n(ute)g(with)h +(name)e Fq(a)p Fv(.)i(The)e(method)g(returns)h(a)g(v)n(alue)g(for)479 +2063 y(e)n(v)o(ery)f Fr(declar)m(ed)j Fv(attrib)n(ute,)d(independently) +e(of)j(whether)f(the)i(attrib)n(ute)e(instance)h(is)h(de\002ned)e(or)h +(not.)g(If)g(the)479 2170 y(attrib)n(ute)g(is)h(not)f(declared,)f +Fq(Not_found)g Fv(will)i(be)f(raised.)g(\(In)f(well-formedness)f(mode,) +h(e)n(v)o(ery)g(attrib)n(ute)h(is)479 2278 y(considered)f(as)i(being)e +(implicitly)h(declared)e(with)j(type)f Fq(CDATA)p Fv(.\))479 +2428 y(The)g(follo)n(wing)f(return)g(v)n(alues)g(are)i(possible:)f +Fq(Value)44 b(s)p Fv(,)20 b Fq(Valuelist)43 b(sl)21 b +Fv(,)f(and)g Fq(Implied_value)p Fv(.)e(The)i(\002rst)479 +2536 y(tw)o(o)h(v)n(alue)e(types)h(indicate)g(that)g(the)g(attrib)n +(ute)g(v)n(alue)g(is)h(a)n(v)n(ailable,)e(either)h(because)g(there)f +(is)i(a)g(de\002nition)479 2644 y Fn(a)p Fq(=")p Fn(value)p +Fq(")f Fv(in)g(the)g(XML)g(te)o(xt,)g(or)g(because)g(there)f(is)i(a)g +(def)o(ault)e(v)n(alue)h(\(declared)f(in)h(the)g(DTD\).)g(Only)g(if)g +(both)479 2752 y(the)g(instance)g(de\002nition)f(and)h(the)g(def)o +(ault)g(declaration)e(are)i(missing,)g(the)h(latter)f(v)n(alue)f +Fq(Implied_value)g Fv(will)479 2860 y(be)h(returned.)479 +3009 y(In)g(the)g(DTD,)h(e)n(v)o(ery)d(attrib)n(ute)i(is)h(typed.)e +(There)h(are)g(single-v)n(alue)e(types)i(\(CD)m(A)-9 +b(T)h(A,)20 b(ID,)g(IDREF)-7 b(,)21 b(ENTITY)-11 b(,)479 +3117 y(NMT)o(OKEN,)19 b(enumerations\),)f(in)i(which)g(case)g(the)h +(method)d(passes)j Fq(Value)44 b(s)21 b Fv(back,)e(where)h +Fq(s)g Fv(is)h(the)479 3225 y(normalized)e(string)g(v)n(alue)h(of)g +(the)g(attrib)n(ute.)g(The)f(other)h(types)g(\(IDREFS,)g(ENTITIES,)f +(NMT)o(OKENS\))479 3333 y(represent)g(list)j(v)n(alues,)d(and)h(the)g +(parser)g(splits)h(the)f(XML)g(literal)h(into)e(se)n(v)o(eral)h(tok)o +(ens)g(and)f(returns)h(these)g(tok)o(ens)479 3441 y(as)h +Fq(Valuelist)44 b(sl)p Fv(.)479 3590 y(Normalization)19 +b(means)h(that)g(entity)g(references)e(\(the)i Fq(&)p +Fn(name)p Fq(;)g Fv(tok)o(ens\))f(and)h(character)f(references)479 +3698 y(\()p Fq(&#)p Fn(number)s Fq(;)p Fv(\))g(are)h(replaced)f(by)g +(the)i(te)o(xt)f(the)o(y)f(represent,)g(and)h(that)g(white)g(space)g +(characters)f(are)i(con)m(v)o(erted)479 3806 y(into)f(plain)g(spaces.)p +Black 396 3955 a Ft(\225)p Black 60 w Fq(n)45 b(#)g(data)p +Fv(:)20 b(Returns)g(the)g(character)f(data)h(contained)f(in)h(the)g +(node.)f(F)o(or)h(data)g(nodes,)f(the)h(meaning)f(is)i(ob)o(vious)479 +4063 y(as)g(this)g(is)g(the)f(main)g(content)f(of)h(data)g(nodes.)f(F)o +(or)h(element)g(nodes,)f(this)i(method)e(returns)g(the)h(concatenated) +479 4171 y(contents)g(of)g(all)g(inner)g(data)g(nodes.)479 +4321 y(Note)g(that)h(entity)f(references)e(included)h(in)h(the)h(te)o +(xt)f(are)g(resolv)o(ed)f(while)h(the)o(y)f(are)h(being)g(parsed;)f +(for)h(e)o(xample)479 4429 y(the)g(te)o(xt)h("a)f(<>)g(b")g(will) +h(be)f(returned)e(as)j("a)g(<>)f(b")g(by)g(this)h(method.)d(Spaces)j +(of)f(data)g(nodes)f(are)h(al)o(w)o(ays)479 4537 y(preserv)o(ed.)e(Ne)n +(wlines)j(are)f(preserv)o(ed,)e(b)n(ut)i(al)o(w)o(ays)g(con)m(v)o +(erted)e(to)i(\\n)h(characters)e(e)n(v)o(en)g(if)i(ne)n(wlines)e(are)i +(encoded)479 4644 y(as)g(\\r\\n)f(or)g(\\r)-5 b(.)21 +b(Normally)e(you)g(will)i(ne)n(v)o(er)e(see)i(tw)o(o)f(adjacent)f(data) +i(nodes)e(because)h(the)g(parser)f(collapses)h(all)h(data)479 +4752 y(material)f(at)h(one)e(location)h(into)g(one)f(node.)g(\(Ho)n(we) +n(v)o(er)m(,)f(if)i(you)g(create)g(your)f(o)n(wn)g(tree)h(or)g +(transform)f(the)h(parsed)479 4860 y(tree,)g(it)h(is)g(possible)f(to)h +(ha)n(v)o(e)e(adjacent)h(data)g(nodes.\))p Black 3797 +5278 a Fr(27)p Black eop +%%Page: 28 28 +28 27 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 479 579 a Fv(Note)g(that)h(elements)f(that)g(do)g +Fr(not)h Fv(allo)n(w)f(#PCD)m(A)-9 b(T)h(A)20 b(as)h(content)e(will)i +(not)f(ha)n(v)o(e)g(data)g(nodes)f(as)i(children.)e(This)479 +687 y(means)h(that)g(spaces)h(and)f(ne)n(wlines,)f(the)h(only)g +(character)f(material)g(allo)n(wed)h(for)g(such)f(elements,)h(are)g +(silently)479 795 y(dropped.)396 986 y(F)o(or)g(e)o(xample,)e(if)i(the) +f(task)h(is)h(to)f(print)f(all)h(contents)f(of)g(elements)h(with)f +(type)h("v)n(aluable")e(whose)h(attrib)n(ute)g("priority")396 +1094 y(is)i("1",)f(this)h(function)d(can)i(help:)396 +1274 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486 +1371 y(let)f(ntype)g(=)h(n)g(#)f(node_type)g(in)486 1468 +y(match)g(ntype)g(with)576 1565 y(T_element)f("valuable")g(when)h(n)h +(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665 +1662 y(print_endline)f("Valuable)g(node)h(with)h(priotity)e(1)i +(found:";)665 1759 y(print_endline)e(\(n)h(#)h(data\))486 +1857 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 1954 +y(let)h(children)e(=)i(n)f(#)h(sub_nodes)e(in)665 2051 +y(List.iter)h(print_valuable_prio1)d(children)486 2148 +y(|)k(_)f(->)665 2245 y(assert)g(false)396 2436 y Fv(Y)-9 +b(ou)20 b(can)g(call)g(this)h(function)e(by:)396 2616 +y Fq(print_valuable_prio1)42 b(root)396 2807 y Fv(If)20 +b(you)g(lik)o(e)g(a)h(DSSSL-lik)o(e)f(style,)g(you)g(can)g(mak)o(e)f +(the)h(function)f Fq(process_children)f Fv(e)o(xplicit:)396 +2987 y Fq(let)45 b(rec)f(print_valuable_prio1)d(n)k(=)486 +3182 y(let)f(process_children)e(n)j(=)576 3279 y(let)f(children)f(=)i +(n)g(#)f(sub_nodes)g(in)576 3376 y(List.iter)f(print_valuable_prio1)e +(children)486 3473 y(in)486 3667 y(let)j(ntype)g(=)h(n)g(#)f(node_type) +g(in)486 3764 y(match)g(ntype)g(with)576 3862 y(T_element)f("valuable") +g(when)h(n)h(#)g(attribute)e("priority")g(=)i(Value)f("1")g(->)665 +3959 y(print_endline)f("Valuable)g(node)h(with)h(priority)e(1)i +(found:";)665 4056 y(print_endline)e(\(n)h(#)h(data\))486 +4153 y(|)g(\(T_element)e(_)h(|)h(T_data\))f(->)665 4250 +y(process_children)e(n)486 4347 y(|)j(_)f(->)665 4444 +y(assert)g(false)396 4635 y Fv(So)21 b(f)o(ar)m(,)e(O'Caml)h(is)i(no)n +(w)d(a)i(simple)f("style-sheet)g(language":)e(Y)-9 b(ou)20 +b(can)g(form)f(a)h(big)g("match")g(e)o(xpression)e(to)396 +4743 y(distinguish)h(between)h(all)h(signi\002cant)e(cases,)i(and)f +(pro)o(vide)e(dif)n(ferent)g(reactions)i(on)g(dif)n(ferent)e +(conditions.)h(But)h(this)396 4851 y(technique)f(has)h(limitations;)g +(the)h("match")e(e)o(xpression)g(tends)h(to)g(get)g(lar)o(ger)f(and)h +(lar)o(ger)m(,)e(and)i(it)g(is)i(dif)n(\002cult)d(to)i(store)p +Black 3800 5278 a Fr(28)p Black eop +%%Page: 29 29 +29 28 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fv(intermediate)f(v)n(alues)h(as)h(there)e(is)j(only)d +(one)h(big)f(recursion.)g(Alternati)n(v)o(ely)-5 b(,)18 +b(it)j(is)g(also)f(possible)g(to)h(represent)e(the)396 +687 y(v)n(arious)g(cases)i(as)g(classes,)g(and)f(to)g(use)h(dynamic)d +(method)h(lookup)g(to)h(\002nd)g(the)g(appropiate)e(class.)j(The)f(ne)o +(xt)f(section)396 795 y(e)o(xplains)g(this)i(technique)e(in)h(detail.) +-2 1213 y Fx(2.3.)39 b(Class-based)e(pr)m(ocessing)g(of)j(the)f(node)f +(tree)396 1393 y Fv(By)21 b(def)o(ault,)e(the)h(parsed)g(node)f(tree)h +(consists)h(of)f(objects)g(of)g(the)g(same)g(class;)h(this)g(is)g(a)g +(good)e(design)g(as)i(long)e(as)i(you)396 1501 y(w)o(ant)g(only)e(to)h +(access)h(selected)f(parts)g(of)g(the)h(document.)c(F)o(or)j(comple)o +(x)f(transformations,)e(it)k(may)f(be)g(better)g(to)g(use)396 +1609 y(dif)n(ferent)f(classes)i(for)f(objects)g(describing)e(dif)n +(ferent)h(element)g(types.)396 1758 y(F)o(or)h(e)o(xample,)f(if)h(the)g +(DTD)h(declares)e(the)i(element)e(types)h Fq(a)p Fv(,)h +Fq(b)p Fv(,)f(and)g Fq(c)p Fv(,)g(and)g(if)g(the)g(task)h(is)g(to)f +(con)m(v)o(ert)e(an)j(arbitrary)396 1866 y(document)d(into)i(a)h +(printable)e(format,)g(the)h(idea)g(is)h(to)f(de\002ne)g(for)g(e)n(v)o +(ery)f(element)g(type)h(a)g(separate)g(class)h(that)g(has)f(a)396 +1974 y(method)f Fq(print)p Fv(.)h(The)g(classes)h(are)f +Fq(eltype_a)p Fv(,)f Fq(eltype_b)p Fv(,)g(and)h Fq(eltype_c)p +Fv(,)f(and)h(e)n(v)o(ery)f(class)i(implements)396 2082 +y Fq(print)f Fv(such)g(that)g(elements)g(of)g(the)g(type)g +(corresponding)d(to)j(the)g(class)i(are)e(con)m(v)o(erted)d(to)k(the)f +(output)f(format.)396 2232 y(The)h(parser)g(supports)f(such)h(a)g +(design)g(directly)-5 b(.)19 b(As)i(it)g(is)g(impossible)e(to)i(deri)n +(v)o(e)d(recursi)n(v)o(e)h(classes)i(in)g(O'Caml)3703 +2198 y Ff(3)3727 2232 y Fv(,)g(the)396 2340 y(specialized)f(element)f +(classes)j(cannot)d(be)h(formed)f(by)g(simply)h(inheriting)f(from)g +(the)h(b)n(uilt-in)g(classes)h(of)f(the)g(parser)396 +2447 y(and)g(adding)f(methods)g(for)g(customized)g(functionality)-5 +b(.)18 b(T)-7 b(o)20 b(get)g(around)f(this)h(limitation,)g(e)n(v)o(ery) +f(node)g(of)h(the)396 2555 y(document)e(tree)j(is)g(represented)d(by)i +Fr(two)h Fv(objects,)e(one)h(called)g("the)g(node")f(and)h(containing)e +(the)i(recursi)n(v)o(e)396 2663 y(de\002nition)f(of)h(the)g(tree,)g +(one)g(called)g("the)g(e)o(xtension".)e(Ev)o(ery)h(node)g(object)h(has) +g(a)h(reference)e(to)h(the)g(e)o(xtension,)f(and)396 +2771 y(the)h(e)o(xtension)f(has)i(a)f(reference)f(to)h(the)g(node.)f +(The)h(adv)n(antage)e(of)i(this)h(model)e(is)i(that)g(it)g(is)g(no)n(w) +e(possible)h(to)396 2879 y(customize)g(the)g(e)o(xtension)f(without)g +(af)n(fecting)g(the)h(typing)f(constraints)g(of)h(the)h(recursi)n(v)o +(e)d(node)h(de\002nition.)396 3029 y(Ev)o(ery)g(e)o(xtension)g(must)h +(ha)n(v)o(e)g(the)g(three)g(methods)f Fq(clone)p Fv(,)g +Fq(node)p Fv(,)h(and)g Fq(set_node)p Fv(.)f(The)h(method)f +Fq(clone)h Fv(creates)396 3137 y(a)h(deep)e(cop)o(y)h(of)g(the)g(e)o +(xtension)f(object)g(and)h(returns)f(it;)i Fq(node)f +Fv(returns)g(the)g(node)f(object)h(for)f(this)i(e)o(xtension)e(object;) +396 3244 y(and)h Fq(set_node)f Fv(is)i(used)f(to)h(tell)g(the)f(e)o +(xtension)f(object)g(which)h(node)f(is)i(associated)f(with)g(it,)h +(this)g(method)e(is)396 3352 y(automatically)g(called)h(when)g(the)g +(node)f(tree)h(is)h(initialized.)f(The)g(follo)n(wing)e(de\002nition)h +(is)i(a)g(good)e(starting)h(point)396 3460 y(for)g(these)g(methods;)f +(usually)h Fq(clone)g Fv(must)g(be)g(further)f(re\002ned)g(when)h +(instance)g(v)n(ariables)f(are)h(added)f(to)h(the)h(class:)396 +3640 y Fq(class)44 b(custom_extension)e(=)486 3738 y(object)i(\(self\)) +576 3932 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(custom_extension)d +(node)i(option\))576 4126 y(method)f(clone)h(=)h({<)g(>})576 +4223 y(method)e(node)i(=)665 4320 y(match)f(node)g(with)845 +4418 y(None)g(->)934 4515 y(assert)g(false)755 4612 y(|)h(Some)f(n)g +(->)h(n)576 4709 y(method)e(set_node)h(n)h(=)665 4806 +y(node)f(<-)h(Some)f(n)p Black 3800 5278 a Fr(29)p Black +eop +%%Page: 30 30 +30 29 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 486 676 a Fq(end)396 867 y Fv(This)h(part)e(of)h(the)h(e)o +(xtension)d(is)j(usually)f(the)g(same)h(for)e(all)i(classes,)g(so)g(it) +g(is)g(a)f(good)f(idea)h(to)g(consider)396 975 y Fq(custom_extension)e +Fv(as)j(the)f(super)n(-class)g(of)g(the)h(further)d(class)j +(de\002nitions.)e(Continuining)f(the)j(e)o(xample)d(of)396 +1083 y(abo)o(v)o(e,)h(we)h(can)g(de\002ne)g(the)g(element)g(type)f +(classes)j(as)e(follo)n(ws:)396 1263 y Fq(class)44 b(virtual)g +(custom_extension)e(=)486 1360 y(object)i(\(self\))576 +1457 y(...)g(clone,)g(node,)g(set_node)f(defined)h(as)g(above)g(...)576 +1652 y(method)f(virtual)h(print)g(:)h(out_channel)e(->)h(unit)486 +1749 y(end)396 1943 y(class)g(eltype_a)g(=)486 2040 y(object)g +(\(self\))576 2137 y(inherit)f(custom_extension)576 2234 +y(method)g(print)h(ch)h(=)g(...)486 2332 y(end)396 2526 +y(class)f(eltype_b)g(=)486 2623 y(object)g(\(self\))576 +2720 y(inherit)f(custom_extension)576 2817 y(method)g(print)h(ch)h(=)g +(...)486 2914 y(end)396 3109 y(class)f(eltype_c)g(=)486 +3206 y(object)g(\(self\))576 3303 y(inherit)f(custom_extension)576 +3400 y(method)g(print)h(ch)h(=)g(...)486 3497 y(end)396 +3688 y Fv(The)20 b(method)f Fq(print)h Fv(can)g(no)n(w)f(be)i +(implemented)d(for)h(e)n(v)o(ery)g(element)h(type)g(separately)-5 +b(.)18 b(Note)i(that)h(you)e(get)h(the)396 3796 y(associated)g(node)f +(by)h(in)m(v)n(oking)396 3976 y Fq(self)44 b(#)h(node)396 +4167 y Fv(and)20 b(you)f(get)h(the)h(e)o(xtension)d(object)i(of)g(a)h +(node)e Fq(n)h Fv(by)g(writing)396 4347 y Fq(n)45 b(#)g(extension)396 +4538 y Fv(It)21 b(is)g(guaranteed)d(that)396 4718 y Fq(self)44 +b(#)h(node)f(#)h(extension)e(==)i(self)p Black 3800 5278 +a Fr(30)p Black eop +%%Page: 31 31 +31 30 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fv(al)o(w)o(ays)h(holds.)396 728 y(Here)f(are)g(sample) +g(de\002nitions)g(of)g(the)g Fq(print)g Fv(methods:)396 +909 y Fq(class)44 b(eltype_a)g(=)486 1006 y(object)g(\(self\))576 +1103 y(inherit)f(custom_extension)576 1200 y(method)g(print)h(ch)h(=) +665 1297 y(\(*)g(Nodes)f(...)f(are)h(only)g(containers:)f(*\)) +665 1394 y(output_string)g(ch)h("\(";)665 1491 y(List.iter)755 +1588 y(\(fun)g(n)h(->)f(n)h(#)g(extension)e(#)i(print)f(ch\))755 +1686 y(\(self)g(#)h(node)f(#)g(sub_nodes\);)665 1783 +y(output_string)f(ch)h("\)";)486 1880 y(end)396 2074 +y(class)g(eltype_b)g(=)486 2171 y(object)g(\(self\))576 +2268 y(inherit)f(custom_extension)576 2366 y(method)g(print)h(ch)h(=) +665 2463 y(\(*)g(Print)f(the)g(value)g(of)h(the)f(CDATA)g(attribute)f +("print":)h(*\))665 2560 y(match)g(self)g(#)h(node)f(#)h(attribute)e +("print")h(with)755 2657 y(Value)g(s)314 b(->)44 b(output_string)f(ch)h +(s)665 2754 y(|)h(Implied_value)e(->)h(output_string)f(ch)h +("")665 2851 y(|)h(Valuelist)e(l)135 b(->)44 +b(assert)g(false)1517 2948 y(\(*)h(not)f(possible)f(because)h(the)g +(att)h(is)f(CDATA)g(*\))486 3045 y(end)396 3240 y(class)g(eltype_c)g(=) +486 3337 y(object)g(\(self\))576 3434 y(inherit)f(custom_extension)576 +3531 y(method)g(print)h(ch)h(=)665 3628 y(\(*)g(Print)f(the)g(contents) +g(of)g(this)g(element:)g(*\))665 3725 y(output_string)f(ch)h(\(self)g +(#)h(node)f(#)h(data\))486 3823 y(end)396 4017 y(class)f +(null_extension)f(=)486 4114 y(object)h(\(self\))576 +4211 y(inherit)f(custom_extension)576 4308 y(method)g(print)h(ch)h(=)g +(assert)e(false)486 4405 y(end)396 4638 y Fv(The)20 b(remaining)f(task) +h(is)h(to)g(con\002gure)d(the)i(parser)g(such)g(that)g(these)g(e)o +(xtension)f(classes)i(are)f(actually)g(used.)g(Here)396 +4746 y(another)f(problem)f(arises:)j(It)g(is)g(not)f(possible)g(to)g +(dynamically)e(select)j(the)f(class)h(of)f(an)g(object)g(to)g(be)h +(created.)e(As)396 4854 y(w)o(orkaround,)e(PXP)k(allo)n(ws)g(the)f +(user)g(to)g(specify)g Fr(e)n(xemplar)g(objects)g Fv(for)f(the)h(v)n +(arious)g(element)f(types;)h(instead)g(of)p Black 3800 +5278 a Fr(31)p Black eop +%%Page: 32 32 +32 31 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fv(creating)f(the)i(nodes)e(of)h(the)g(tree)g(by)g +(applying)f(the)h Fq(new)g Fv(operator)e(the)j(nodes)e(are)h(produced)e +(by)i(duplicating)e(the)396 687 y(e)o(x)o(emplars.)h(As)h(object)g +(duplication)f(preserv)o(es)g(the)h(class)h(of)f(the)g(object,)f(one)h +(can)g(create)g(fresh)g(objects)g(of)g(e)n(v)o(ery)396 +795 y(class)h(for)f(which)g(pre)n(viously)e(an)i(e)o(x)o(emplar)e(has)j +(been)e(re)o(gistered.)396 944 y(Ex)o(emplars)g(are)h(meant)g(as)h +(objects)f(without)f(contents,)g(the)h(only)g(interesting)f(thing)g(is) +j(that)e(e)o(x)o(emplars)e(are)396 1052 y(instances)i(of)g(a)h(certain) +f(class.)g(The)g(creation)f(of)h(an)h(e)o(x)o(emplar)d(for)h(an)h +(element)g(node)f(can)h(be)g(done)f(by:)396 1232 y Fq(let)45 +b(element_exemplar)d(=)i(new)h(element_impl)e(extension_exemplar)396 +1423 y Fv(And)20 b(a)h(data)f(node)f(e)o(x)o(emplar)f(is)j(created)f +(by:)396 1603 y Fq(let)45 b(data_exemplar)d(=)j(new)f(data_impl)g +(extension_exemplar)396 1794 y Fv(The)20 b(classes)h +Fq(element_impl)e Fv(and)h Fq(data_impl)f Fv(are)h(de\002ned)f(in)i +(the)f(module)f Fq(Pxp_document)p Fv(.)f(The)396 1902 +y(constructors)h(initialize)h(the)g(fresh)g(objects)g(as)h(empty)e +(objects,)h(i.e.)g(without)g(children,)e(without)i(data)g(contents,)f +(and)396 2010 y(so)i(on.)e(The)h Fq(extension_exemplar)e +Fv(is)j(the)f(initial)h(e)o(xtension)e(object)g(the)h(e)o(x)o(emplars)f +(are)h(associated)g(with.)396 2160 y(Once)g(the)g(e)o(x)o(emplars)f +(are)h(created)f(and)h(stored)g(some)n(where)f(\(e.g.)g(in)h(a)h(hash)f +(table\),)f(you)h(can)g(tak)o(e)g(an)g(e)o(x)o(emplar)396 +2268 y(and)g(create)g(a)g(concrete)f(instance)h(\(with)g(contents\))f +(by)h(duplicating)e(it.)j(As)g(user)f(of)g(the)g(parser)g(you)f(are)h +(normally)396 2376 y(not)g(concerned)e(with)i(this)h(as)g(this)g(is)g +(part)f(of)g(the)g(internal)f(logic)h(of)g(the)g(parser)m(,)f(b)n(ut)h +(as)h(background)c(kno)n(wledge)h(it)396 2483 y(is)j(w)o(orthwhile)e +(to)i(mention)e(that)h(the)g(tw)o(o)h(methods)e Fq(create_element)f +Fv(and)i Fq(create_data)f Fv(actually)g(perform)396 2591 +y(the)h(duplication)f(of)g(the)i(e)o(x)o(emplar)d(for)h(which)h(the)o +(y)f(are)h(in)m(v)n(ok)o(ed,)e(additionally)g(apply)i(modi\002cations)e +(to)j(the)f(clone,)396 2699 y(and)g(\002nally)g(return)f(the)h(ne)n(w)g +(object.)f(Moreo)o(v)o(er)m(,)f(the)i(e)o(xtension)e(object)i(is)h +(copied,)e(too,)h(and)f(the)i(ne)n(w)f(node)f(object)396 +2807 y(is)i(associated)f(with)g(the)g(fresh)g(e)o(xtension)e(object.)i +(Note)g(that)g(this)g(is)h(the)f(reason)g(why)f(e)n(v)o(ery)g(e)o +(xtension)f(object)i(must)396 2915 y(ha)n(v)o(e)g(a)g +Fq(clone)g Fv(method.)396 3065 y(The)g(con\002guration)e(of)i(the)g +(set)h(of)f(e)o(x)o(emplars)e(is)j(passed)f(to)h(the)f +Fq(parse_document_entity)d Fv(function)i(as)i(third)396 +3173 y(ar)o(gument.)d(In)i(our)f(e)o(xample,)g(this)h(ar)o(gument)e +(can)i(be)g(set)h(up)f(as)h(follo)n(ws:)396 3353 y Fq(let)45 +b(spec)f(=)486 3450 y(make_spec_from_alist)576 3547 y(~data_exemplar:) +535 b(\(new)44 b(data_impl)g(\(new)g(null_extension\)\))576 +3644 y(~default_element_exemplar:)c(\(new)k(element_impl)f(\(new)h +(null_extension\)\))576 3741 y(~element_alist:)710 3838 +y([)h("a",)89 b(new)44 b(element_impl)f(\(new)h(eltype_a\);)800 +3935 y("b",)89 b(new)44 b(element_impl)f(\(new)h(eltype_b\);)800 +4033 y("c",)89 b(new)44 b(element_impl)f(\(new)h(eltype_c\);)710 +4130 y(])576 4227 y(\(\))396 4418 y Fv(The)20 b Fq(~element_alist)f +Fv(function)f(ar)o(gument)g(de\002nes)i(the)g(mapping)e(from)h(element) +h(types)g(to)g(e)o(x)o(emplars)f(as)396 4526 y(associati)n(v)o(e)h +(list.)h(The)f(ar)o(gument)e Fq(~data_exemplar)g Fv(speci\002es)j(the)f +(e)o(x)o(emplar)e(for)i(data)g(nodes,)f(and)h(the)396 +4634 y Fq(~default_element_exemplar)d Fv(is)k(used)f(whene)n(v)o(er)e +(the)i(parser)g(\002nds)g(an)g(element)g(type)f(for)h(which)g(the)396 +4742 y(associati)n(v)o(e)g(list)h(does)f(not)g(de\002ne)g(an)g(e)o(x)o +(emplar)-5 b(.)p Black 3800 5278 a Fr(32)p Black eop +%%Page: 33 33 +33 32 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fv(The)g(con\002guration)e(is)j(no)n(w)e(complete.)g(Y) +-9 b(ou)20 b(can)g(still)h(use)g(the)f(same)g(parsing)f(functions,)g +(only)g(the)h(initialization)g(is)396 687 y(a)h(bit)f(dif)n(ferent.)f +(F)o(or)g(e)o(xample,)g(call)i(the)f(parser)f(by:)396 +867 y Fq(let)45 b(d)f(=)h(parse_document_entity)c(default_config)i +(\(from_file)g("doc.xml"\))g(spec)396 1058 y Fv(Note)20 +b(that)h(the)f(resulting)f(document)f Fq(d)j Fv(has)f(a)h(usable)f +(type;)g(especially)f(the)i Fq(print)f Fv(method)e(we)j(added)e(is)i +(visible.)396 1166 y(So)g(you)e(can)h(print)g(your)e(document)h(by)396 +1346 y Fq(d)45 b(#)g(root)f(#)g(extension)g(#)g(print)g(stdout)396 +1578 y Fv(This)21 b(object-oriented)c(approach)h(looks)i(rather)f +(complicated;)g(this)h(is)i(mostly)d(caused)h(by)g(w)o(orking)e(around) +h(some)396 1686 y(problems)g(of)h(the)g(strict)h(typing)e(system)h(of)g +(O'Caml.)g(Some)g(auxiliary)f(concepts)g(such)h(as)h(e)o(xtensions)e +(were)396 1794 y(needed,)g(b)n(ut)h(the)g(practical)g(consequences)e +(are)i(lo)n(w)-5 b(.)20 b(In)g(the)g(ne)o(xt)f(section,)h(one)g(of)g +(the)g(e)o(xamples)f(of)h(the)396 1902 y(distrib)n(ution)f(is)i(e)o +(xplained,)d(a)j(con)m(v)o(erter)d(from)h Fr(r)m(eadme)h +Fv(documents)e(to)i(HTML.)-2 2321 y Fx(2.4.)39 b(Example:)f(An)h(HTML)f +(bac)m(kend)g(f)m(or)h(the)g Fd(readme)44 b Fx(DTD)396 +2501 y Fv(The)20 b(con)m(v)o(erter)e(from)h Fr(r)m(eadme)h +Fv(documents)e(to)i(HTML)g(documents)f(follo)n(ws)h(strictly)g(the)g +(approach)e(to)j(de\002ne)e(one)396 2609 y(class)i(per)f(element)g +(type.)f(The)h(HTML)g(code)g(is)h(similar)f(to)g(the)h +Fr(r)m(eadme)e Fv(source,)g(because)h(of)g(this)h(most)f(elements)396 +2716 y(can)g(be)g(con)m(v)o(erted)e(in)i(the)g(follo)n(wing)f(w)o(ay:)h +(Gi)n(v)o(en)g(the)g(input)f(element)396 2897 y Fq(content)396 +3088 y Fv(the)h(con)m(v)o(ersion)e(te)o(xt)i(is)h(the)f(concatenation)e +(of)i(a)h(computed)d(pre\002x,)h(the)h(recursi)n(v)o(ely)f(con)m(v)o +(erted)e(content,)i(and)h(a)396 3195 y(computed)e(suf)n(\002x.)396 +3345 y(Only)i(one)g(element)f(type)h(cannot)f(be)h(handled)f(by)h(this) +g(scheme:)g Fq(footnote)p Fv(.)f(F)o(ootnotes)g(are)h(collected)g +(while)g(the)o(y)396 3453 y(are)g(found)f(in)h(the)g(input)g(te)o(xt,)g +(and)f(the)o(y)h(are)g(printed)f(after)h(the)g(main)g(te)o(xt)g(has)g +(been)g(con)m(v)o(erted)d(and)j(printed.)-2 3781 y Fp(2.4.1.)35 +b(Header)396 4021 y Fq(open)44 b(Pxp_types)396 4118 y(open)g +(Pxp_document)-2 4571 y Fp(2.4.2.)35 b(T)-7 b(ype)34 +b(dec)n(larations)396 4811 y Fq(class)44 b(type)g(footnote_printer)f(=) +p Black 3800 5278 a Fr(33)p Black eop +%%Page: 34 34 +34 33 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 486 579 a Fq(object)576 676 y(method)43 b(footnote_to_html)g(:)h +(store_type)f(-)p Fo(>)i Fq(out_channel)e(-)p Fo(>)h +Fq(unit)486 773 y(end)396 967 y(and)h(store_type)e(=)486 +1065 y(object)576 1162 y(method)g(alloc_footnote)g(:)i +(footnote_printer)d(-)p Fo(>)i Fq(int)576 1259 y(method)f +(print_footnotes)g(:)h(out_channel)f(-)p Fo(>)i Fq(unit)486 +1356 y(end)396 1453 y(;;)-2 1906 y Fp(2.4.3.)35 b(Class)g +Fc(store)396 2073 y Fv(The)20 b Fq(store)g Fv(is)h(a)g(container)d(for) +i(footnotes.)f(Y)-9 b(ou)19 b(can)h(add)g(a)g(footnote)f(by)h(in)m(v)n +(oking)e Fq(alloc_footnote)p Fv(;)g(the)396 2181 y(ar)o(gument)g(is)j +(an)f(object)g(of)g(the)g(class)h Fq(footnote_printer)p +Fv(,)d(the)i(method)f(returns)g(the)i(number)d(of)i(the)g(footnote.)396 +2289 y(The)g(interesting)f(property)f(of)i(a)h(footnote)d(is)k(that)e +(it)h(can)f(be)g(con)m(v)o(erted)d(to)k(HTML,)e(so)i(a)g +Fq(footnote_printer)d Fv(is)396 2397 y(an)i(object)g(with)g(a)h(method) +e Fq(footnote_to_html)p Fv(.)f(The)i(class)h Fq(footnote)e +Fv(which)h(is)h(de\002ned)e(belo)n(w)h(has)g(a)396 2505 +y(compatible)f(method)g Fq(footnote_to_html)f Fv(such)i(that)g(objects) +g(created)f(from)h(it)h(can)f(be)g(used)g(as)396 2613 +y Fq(footnote_printer)p Fv(s.)396 2763 y(The)g(other)f(method,)g +Fq(print_footnotes)f Fv(prints)i(the)g(footnotes)f(as)i(de\002nition)e +(list,)i(and)f(is)h(typically)e(in)m(v)n(ok)o(ed)396 +2870 y(after)h(the)g(main)g(material)g(of)g(the)g(page)g(has)g(already) +f(been)h(printed.)e(Ev)o(ery)h(item)h(of)g(the)h(list)g(is)g(printed)e +(by)396 2978 y Fq(footnote_to_html)p Fv(.)396 3200 y +Fq(class)44 b(store)g(=)486 3297 y(object)g(\(self\))576 +3491 y(val)g(mutable)g(footnotes)f(=)i(\()f([])h(:)f(\(int)h(*)f +(footnote_printer\))e(list)i(\))576 3589 y(val)g(mutable)g +(next_footnote_number)d(=)k(1)576 3783 y(method)e(alloc_footnote)g(n)i +(=)665 3880 y(let)g(number)e(=)i(next_footnote_number)d(in)665 +3977 y(next_footnote_number)g Fo(<)p Fq(-)i(number+1;)665 +4074 y(footnotes)g Fo(<)p Fq(-)g(footnotes)f(@)i([)g(number,)e(n)i(];) +665 4171 y(number)576 4366 y(method)e(print_footnotes)g(ch)h(=)665 +4463 y(if)h(footnotes)e Fo(<>)h Fq([])h(then)f(begin)396 +4560 y(output_string)f(ch)h(")p Fo(<)p Fq(hr)g(align=left)g +(noshade=noshade)e(width=\\"30\045\\")p Fo(>)p Fq(\\n";)396 +4657 y(output_string)h(ch)h(")p Fo(<)p Fq(dl)p Fo(>)p +Fq(\\n";)396 4754 y(List.iter)486 4851 y(\(fun)g(\(_,n\))g(-)p +Fo(>)p Black 3800 5278 a Fr(34)p Black eop +%%Page: 35 35 +35 34 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 620 579 a Fq(n)45 b(#)g(footnote_to_html)d(\(self)i(:)h +(#store_type)e(:)p Fo(>)h Fq(store_type\))f(ch\))486 +676 y(footnotes;)396 773 y(output_string)g(ch)h(")p Fo(<)p +Fq(/dl)p Fo(>)p Fq(\\n";)665 870 y(end)486 1065 y(end)396 +1162 y(;;)-2 1614 y Fp(2.4.4.)35 b(Function)f Fc(escape_html)396 +1782 y Fv(This)21 b(function)d(con)m(v)o(erts)h(the)h(characters)f +Fm(<)p Fv(,)h Fm(>)p Fv(,)g(&,)g(and)g(")h(to)f(their)g(HTML)g +(representation.)e(F)o(or)h(e)o(xample,)396 1890 y Fq(escape_html)43 +b(")p Fo(<>)p Fq(")h(=)h("<>")p Fv(.)19 b(Other)g(characters)h +(are)g(left)g(unchanged.)396 2070 y Fq(let)45 b(escape_html)e(s)h(=)486 +2167 y(Str.global_substitute)576 2264 y(\(Str.regexp)f(")p +Fo(<)p Fq(\\\\|)p Fo(>)p Fq(\\\\|&\\\\|\\""\))576 2362 +y(\(fun)h(s)g(-)p Fo(>)665 2459 y Fq(match)g(Str.matched_string)e(s)j +(with)755 2556 y(")p Fo(<)p Fq(")f(-)p Fo(>)h Fq("<")665 +2653 y(|)g(")p Fo(>)p Fq(")f(-)p Fo(>)h Fq(">")665 +2750 y(|)g("&")f(-)p Fo(>)h Fq("&")665 2847 y(|)g("\\"")f(-)p +Fo(>)g Fq(""")665 2944 y(|)h(_)g(-)p Fo(>)f Fq(assert)g(false\)) +576 3042 y(s)396 3139 y(;;)-2 3591 y Fp(2.4.5.)35 b(Vir)r(tual)f(c)n +(lass)h Fc(shared)396 3759 y Fv(This)21 b(virtual)e(class)i(is)g(the)g +(abstract)f(superclass)g(of)f(the)i(e)o(xtension)d(classes)k(sho)n(wn)d +(belo)n(w)-5 b(.)19 b(It)i(de\002nes)f(the)g(standard)396 +3867 y(methods)f Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)g +Fq(set_node)p Fv(,)f(and)g(declares)h(the)g(type)g(of)g(the)g(virtual)g +(method)e Fq(to_html)p Fv(.)i(This)396 3975 y(method)f(recursi)n(v)o +(ely)f(tra)n(v)o(erses)i(the)g(whole)g(element)g(tree,)g(and)f(prints)h +(the)g(con)m(v)o(erted)e(HTML)i(code)f(to)i(the)f(output)396 +4083 y(channel)f(passed)h(as)h(second)f(ar)o(gument.)d(The)j(\002rst)h +(ar)o(gument)d(is)j(the)f(reference)f(to)h(the)g(global)f +Fq(store)h Fv(object)g(which)396 4191 y(collects)h(the)f(footnotes.)396 +4371 y Fq(class)44 b(virtual)g(shared)g(=)486 4468 y(object)g(\(self\)) +576 4662 y(\(*)g(--)h(default_ext)e(--)h(*\))576 4857 +y(val)g(mutable)g(node)g(=)g(\(None)g(:)h(shared)f(node)g(option\))p +Black 3800 5278 a Fr(35)p Black eop +%%Page: 36 36 +36 35 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 576 676 a Fq(method)43 b(clone)h(=)h({)p Fo(<)f(>)p +Fq(})576 773 y(method)f(node)i(=)665 870 y(match)f(node)g(with)845 +967 y(None)g(-)p Fo(>)934 1065 y Fq(assert)g(false)755 +1162 y(|)h(Some)f(n)g(-)p Fo(>)h Fq(n)576 1259 y(method)e(set_node)h(n) +h(=)665 1356 y(node)f Fo(<)p Fq(-)h(Some)f(n)576 1550 +y(\(*)g(--)h(virtual)e(--)i(*\))576 1745 y(method)e(virtual)h(to_html)g +(:)g(store)g(-)p Fo(>)h Fq(out_channel)e(-)p Fo(>)h Fq(unit)486 +1939 y(end)396 2036 y(;;)-2 2489 y Fp(2.4.6.)35 b(Class)g +Fc(only_data)396 2656 y Fv(This)21 b(class)g(de\002nes)f +Fq(to_html)f Fv(such)h(that)h(the)f(character)f(data)h(of)g(the)g +(current)f(node)g(is)i(con)m(v)o(erted)d(to)i(HTML.)g(Note)396 +2764 y(that)h Fq(self)f Fv(is)h(an)f(e)o(xtension)f(object,)g +Fq(self)44 b(#)h(node)20 b Fv(is)h(the)f(node)f(object,)h(and)f +Fq(self)45 b(#)f(node)g(#)h(data)20 b Fv(returns)396 +2872 y(the)g(character)f(data)h(of)g(the)h(node.)396 +3052 y Fq(class)44 b(only_data)g(=)486 3149 y(object)g(\(self\))576 +3247 y(inherit)f(shared)576 3441 y(method)g(to_html)h(store)g(ch)h(=) +665 3538 y(output_string)e(ch)h(\(escape_html)f(\(self)h(#)h(node)f(#)h +(data\)\))486 3635 y(end)396 3732 y(;;)-2 4185 y Fp(2.4.7.)35 +b(Class)g Fc(readme)396 4353 y Fv(This)21 b(class)g(con)m(v)o(erts)d +(elements)i(of)g(type)g Fq(readme)g Fv(to)g(HTML.)g(Such)f(an)h +(element)g(is)h(\(by)f(de\002nition\))e(al)o(w)o(ays)j(the)396 +4461 y(root)f(element)f(of)h(the)g(document.)e(First,)j(the)f(HTML)g +(header)f(is)j(printed;)d(the)h Fq(title)g Fv(attrib)n(ute)f(of)h(the)h +(element)396 4568 y(determines)e(the)i(title)f(of)g(the)h(HTML)f(page.) +f(Some)h(aspects)g(of)g(the)g(HTML)g(page)g(can)g(be)g(con\002gured)e +(by)h(setting)396 4676 y(certain)h(parameter)f(entities,)h(for)g(e)o +(xample)e(the)i(background)d(color)m(,)i(the)h(te)o(xt)g(color)m(,)f +(and)h(link)g(colors.)f(After)h(the)396 4784 y(header)m(,)f(the)h +Fq(body)g Fv(tag,)g(and)g(the)g(headline)f(ha)n(v)o(e)g(been)h +(printed,)f(the)h(contents)f(of)h(the)g(page)g(are)g(con)m(v)o(erted)e +(by)p Black 3798 5278 a Fr(36)p Black eop +%%Page: 37 37 +37 36 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fv(in)m(v)n(oking)e Fq(to_html)i Fv(on)g(all)g +(children)f(of)h(the)g(current)f(node)g(\(which)h(is)h(the)f(root)f +(node\).)g(Then,)g(the)h(footnotes)f(are)396 687 y(appended)f(to)j +(this)f(by)g(telling)g(the)g(global)f Fq(store)h Fv(object)g(to)g +(print)g(the)g(footnotes.)f(Finally)-5 b(,)19 b(the)h(end)g(tags)g(of)g +(the)396 795 y(HTML)g(pages)g(are)g(printed.)396 944 +y(This)h(class)g(is)g(an)f(e)o(xample)f(ho)n(w)g(to)i(access)g(the)f(v) +n(alue)f(of)h(an)g(attrib)n(ute:)g(The)g(v)n(alue)g(is)h(determined)d +(by)i(in)m(v)n(oking)396 1052 y Fq(self)44 b(#)h(node)f(#)h(attribute)e +("title")p Fv(.)20 b(As)h(this)f(attrib)n(ute)g(has)g(been)g(declared)f +(as)i(CD)m(A)-9 b(T)h(A)20 b(and)g(as)h(being)396 1160 +y(required,)d(the)j(v)n(alue)e(has)i(al)o(w)o(ays)f(the)g(form)g +Fq(Value)44 b(s)20 b Fv(where)g Fq(s)g Fv(is)h(the)g(string)e(v)n(alue) +h(of)g(the)g(attrib)n(ute.)396 1310 y(Y)-9 b(ou)20 b(can)g(also)g(see)h +(ho)n(w)f(entity)g(contents)f(can)h(be)g(accessed.)g(A)h(parameter)e +(entity)g(object)h(can)g(be)g(look)o(ed)f(up)h(by)396 +1417 y Fq(self)44 b(#)h(node)f(#)h(dtd)f(#)h(par_entity)e("name")p +Fv(,)20 b(and)f(by)h(in)m(v)n(oking)e Fq(replacement_text)g +Fv(the)i(v)n(alue)g(of)396 1525 y(the)g(entity)g(is)h(returned)e(after) +h(inner)f(parameter)g(and)g(character)g(entities)i(ha)n(v)o(e)f(been)f +(processed.)g(Note)h(that)g(you)396 1633 y(must)g(use)h +Fq(gen_entity)e Fv(instead)h(of)g Fq(par_entity)f Fv(to)h(access)h +(general)e(entities.)396 1855 y Fq(class)44 b(readme)g(=)486 +1952 y(object)g(\(self\))576 2049 y(inherit)f(shared)576 +2244 y(method)g(to_html)h(store)g(ch)h(=)665 2341 y(\(*)g(output)f +(header)f(*\))665 2438 y(output_string)396 2535 y(ch)i(")p +Fo(<)p Fq(!DOCTYPE)e(HTML)h(PUBLIC)g(\\"-//W3C//DTD)e(HTML)j(3.2)f +(Final//EN\\")p Fo(>)p Fq(";)665 2632 y(output_string)396 +2729 y(ch)h(")p Fo(<)p Fq(!-)f(WARNING!)f(This)h(is)h(a)g(generated)e +(file,)h(do)g(not)h(edit!)f(-)p Fo(>)p Fq(\\n";)665 2826 +y(let)h(title)f(=)396 2923 y(match)g(self)g(#)h(node)f(#)h(attribute)e +("title")h(with)576 3021 y(Value)g(s)g(-)p Fo(>)h Fq(s)486 +3118 y(|)g(_)f(-)p Fo(>)h Fq(assert)e(false)665 3215 +y(in)665 3312 y(let)i(html_header,)d(_)j(=)396 3409 y(try)g(\(self)f(#) +g(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:header"\))934 +3506 y(#)i(replacement_text)396 3603 y(with)f(WF_error)g(_)h(-)p +Fo(>)f Fq("",)g(false)g(in)665 3701 y(let)h(html_trailer,)d(_)j(=)396 +3798 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e +("readme:html:trailer"\))934 3895 y(#)i(replacement_text)396 +3992 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665 +4089 y(let)h(html_bgcolor,)d(_)j(=)396 4186 y(try)g(\(self)f(#)g(node)g +(#)h(dtd)f(#)h(par_entity)e("readme:html:bgcolor"\))934 +4283 y(#)i(replacement_text)396 4380 y(with)f(WF_error)g(_)h(-)p +Fo(>)f Fq("white",)f(false)h(in)665 4478 y(let)h(html_textcolor,)d(_)j +(=)396 4575 y(try)g(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e +("readme:html:textcolor"\))934 4672 y(#)i(replacement_text)396 +4769 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665 +4866 y(let)h(html_alinkcolor,)d(_)i(=)p Black 3797 5278 +a Fr(37)p Black eop +%%Page: 38 38 +38 37 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fq(try)45 b(\(self)f(#)g(node)g(#)h(dtd)f(#)h +(par_entity)e("readme:html:alinkcolor"\))934 676 y(#)i +(replacement_text)396 773 y(with)f(WF_error)g(_)h(-)p +Fo(>)f Fq("",)g(false)g(in)665 870 y(let)h(html_vlinkcolor,)d(_)i(=)396 +967 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e +("readme:html:vlinkcolor"\))934 1065 y(#)i(replacement_text)396 +1162 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665 +1259 y(let)h(html_linkcolor,)d(_)j(=)396 1356 y(try)g(\(self)f(#)g +(node)g(#)h(dtd)f(#)h(par_entity)e("readme:html:linkcolor"\))934 +1453 y(#)i(replacement_text)396 1550 y(with)f(WF_error)g(_)h(-)p +Fo(>)f Fq("",)g(false)g(in)665 1647 y(let)h(html_background,)d(_)i(=) +396 1745 y(try)h(\(self)f(#)g(node)g(#)h(dtd)f(#)h(par_entity)e +("readme:html:background"\))934 1842 y(#)i(replacement_text)396 +1939 y(with)f(WF_error)g(_)h(-)p Fo(>)f Fq("",)g(false)g(in)665 +2133 y(output_string)f(ch)h(")p Fo(<)p Fq(html)p Fo(><)p +Fq(header)p Fo(><)p Fq(title)p Fo(>)p Fq(\\n";)665 2230 +y(output_string)f(ch)h(\(escape_html)f(title\);)665 2327 +y(output_string)g(ch)h(")p Fo(<)p Fq(/title)p Fo(><)p +Fq(/header)p Fo(>)p Fq(\\n";)665 2424 y(output_string)f(ch)h(")p +Fo(<)p Fq(body)g(";)665 2522 y(List.iter)396 2619 y(\(fun)g +(\(name,value\))f(-)p Fo(>)531 2716 y Fq(if)h(value)g +Fo(<>)h Fq("")f(then)620 2813 y(output_string)f(ch)i(\(name)f(^)g +("=\\"")g(^)h(escape_html)e(value)h(^)h("\\")f("\)\))396 +2910 y([)h("bgcolor",)178 b(html_bgcolor;)486 3007 y("text",)313 +b(html_textcolor;)486 3104 y("link",)g(html_linkcolor;)486 +3202 y("alink",)268 b(html_alinkcolor;)486 3299 y("vlink",)g +(html_vlinkcolor;)396 3396 y(];)665 3493 y(output_string)43 +b(ch)h(")p Fo(>)p Fq(\\n";)665 3590 y(output_string)f(ch)h +(html_header;)665 3687 y(output_string)f(ch)h(")p Fo(<)p +Fq(h1)p Fo(>)p Fq(";)665 3784 y(output_string)f(ch)h(\(escape_html)f +(title\);)665 3882 y(output_string)g(ch)h(")p Fo(<)p +Fq(/h1)p Fo(>)p Fq(\\n";)665 3979 y(\(*)h(process)e(main)i(content:)e +(*\))665 4076 y(List.iter)396 4173 y(\(fun)h(n)h(-)p +Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396 +4270 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 4367 +y(\(*)g(now)f(process)g(footnotes)f(*\))665 4464 y(store)h(#)h +(print_footnotes)d(ch;)665 4561 y(\(*)j(trailer)e(*\))665 +4659 y(output_string)g(ch)h(html_trailer;)665 4756 y(output_string)f +(ch)h(")p Fo(<)p Fq(/html)p Fo(>)p Fq(\\n";)p Black 3800 +5278 a Fr(38)p Black eop +%%Page: 39 39 +39 38 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 486 579 a Fq(end)396 676 y(;;)-2 1129 y Fp(2.4.8.)35 +b(Classes)h Fc(section)p Fp(,)31 b Fc(sect1)p Fp(,)g +Fc(sect2)p Fp(,)g(and)j Fc(sect3)396 1296 y Fv(As)21 +b(the)f(con)m(v)o(ersion)e(process)i(is)h(v)o(ery)e(similar)m(,)h(the)g +(con)m(v)o(ersion)d(classes)22 b(of)e(the)g(three)g(section)f(le)n(v)o +(els)i(are)f(deri)n(v)o(ed)396 1404 y(from)f(the)i(more)e(general)g +Fq(section)h Fv(class.)h(The)e(HTML)h(code)g(of)g(the)g(section)g(le)n +(v)o(els)g(only)f(dif)n(fers)h(in)g(the)g(type)g(of)396 +1512 y(the)g(headline,)f(and)h(because)f(of)h(this)h(the)f(classes)i +(describing)c(the)i(section)g(le)n(v)o(els)g(can)g(be)h(computed)d(by)i +(replacing)396 1620 y(the)g(class)i(ar)o(gument)17 b +Fq(the_tag)j Fv(of)g Fq(section)g Fv(by)f(the)i(HTML)e(name)h(of)g(the) +g(headline)f(tag.)396 1770 y(Section)h(elements)g(are)g(con)m(v)o +(erted)e(to)i(HTML)g(by)g(printing)e(a)j(headline)e(and)h(then)f(con)m +(v)o(erting)f(the)i(contents)f(of)h(the)396 1878 y(element)g(recursi)n +(v)o(ely)-5 b(.)18 b(More)h(precisely)-5 b(,)19 b(the)h(\002rst)h +(sub-element)e(is)i(al)o(w)o(ays)f(a)h Fq(title)f Fv(element,)f(and)h +(the)g(other)396 1985 y(elements)g(are)g(the)g(contents)g(of)g(the)g +(section.)g(This)g(structure)f(is)j(declared)c(in)j(the)f(DTD,)g(and)g +(it)h(is)g(guaranteed)d(that)396 2093 y(the)i(document)f(matches)g(the) +i(DTD.)f(Because)g(of)g(this)h(the)f(title)h(node)e(can)h(be)g +(separated)f(from)g(the)h(rest)h(without)f(an)o(y)396 +2201 y(checks.)396 2351 y(Both)g(the)h(title)g(node,)e(and)g(the)h +(body)f(nodes)h(are)g(then)f(con)m(v)o(erted)f(to)i(HTML)g(by)g +(calling)g Fq(to_html)f Fv(on)h(them.)396 2572 y Fq(class)44 +b(section)g(the_tag)g(=)486 2670 y(object)g(\(self\))576 +2767 y(inherit)f(shared)576 2961 y(val)h(tag)g(=)h(the_tag)576 +3155 y(method)e(to_html)h(store)g(ch)h(=)665 3252 y(let)g(sub_nodes)e +(=)i(self)f(#)g(node)h(#)f(sub_nodes)g(in)665 3350 y(match)g(sub_nodes) +g(with)486 3447 y(title_node)f(::)i(rest)f(-)p Fo(>)576 +3544 y Fq(output_string)e(ch)j(\(")p Fo(<)p Fq(")f(^)g(tag)h(^)f(")p +Fo(>)p Fq(\\n"\);)576 3641 y(title_node)f(#)h(extension)g(#)g(to_html)g +(store)g(ch;)576 3738 y(output_string)e(ch)j(\("\\n)p +Fo(<)p Fq(/")e(^)i(tag)f(^)h(")p Fo(>)p Fq("\);)576 3835 +y(List.iter)665 3932 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#) +i(to_html)e(store)h(ch\))665 4029 y(rest)396 4127 y(|)h(_)g(-)p +Fo(>)576 4224 y Fq(assert)e(false)486 4321 y(end)396 +4418 y(;;)396 4612 y(class)h(sect1)g(=)h(section)f("h1";;)396 +4709 y(class)g(sect2)g(=)h(section)f("h3";;)396 4807 +y(class)g(sect3)g(=)h(section)f("h4";;)p Black 3800 5278 +a Fr(39)p Black eop +%%Page: 40 40 +40 39 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black -2 583 a Fp(2.4.9.)35 b(Classes)h Fc(map_tag)p +Fp(,)31 b Fc(p)p Fp(,)i Fc(em)p Fp(,)f Fc(ul)p Fp(,)h +Fc(li)396 751 y Fv(Se)n(v)o(eral)20 b(element)f(types)h(are)g(con)m(v)o +(erted)e(to)i(HTML)g(by)g(simply)g(mapping)e(them)i(to)g(corresponding) +d(HTML)396 859 y(element)j(types.)g(The)f(class)j Fq(map_tag)d +Fv(implements)g(this,)i(and)f(the)g(class)h(ar)o(gument)d +Fq(the_target_tag)396 967 y Fv(determines)h(the)i(tag)f(name)f(to)i +(map)e(to.)h(The)g(output)f(consists)i(of)f(the)g(start)h(tag,)f(the)g +(recursi)n(v)o(ely)e(con)m(v)o(erted)g(inner)396 1075 +y(elements,)i(and)g(the)g(end)f(tag.)396 1255 y Fq(class)44 +b(map_tag)g(the_target_tag)e(=)486 1352 y(object)i(\(self\))576 +1449 y(inherit)f(shared)576 1643 y(val)h(target_tag)f(=)i +(the_target_tag)576 1838 y(method)e(to_html)h(store)g(ch)h(=)665 +1935 y(output_string)e(ch)h(\(")p Fo(<)p Fq(")g(^)h(target_tag)e(^)i(") +p Fo(>)p Fq(\\n"\);)665 2032 y(List.iter)396 2129 y(\(fun)f(n)h(-)p +Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396 +2226 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 2323 +y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/")g(^)h(target_tag)e(^)h(")p +Fo(>)p Fq("\);)486 2420 y(end)396 2518 y(;;)396 2712 +y(class)g(p)h(=)g(map_tag)e("p";;)396 2809 y(class)h(em)h(=)f(map_tag)g +("b";;)396 2906 y(class)g(ul)h(=)f(map_tag)g("ul";;)396 +3003 y(class)g(li)h(=)f(map_tag)g("li";;)-2 3456 y Fp(2.4.10.)36 +b(Class)f Fc(br)396 3624 y Fv(Element)20 b(of)g(type)f +Fq(br)i Fv(are)f(mapped)f(to)h(the)g(same)g(HTML)g(type.)g(Note)g(that) +g(HTML)g(forbids)f(the)h(end)g(tag)g(of)g Fq(br)p Fv(.)396 +3804 y Fq(class)44 b(br)h(=)486 3901 y(object)f(\(self\))576 +3998 y(inherit)f(shared)576 4192 y(method)g(to_html)h(store)g(ch)h(=) +665 4289 y(output_string)e(ch)h(")p Fo(<)p Fq(br)p Fo(>)p +Fq(\\n";)665 4387 y(List.iter)396 4484 y(\(fun)g(n)h(-)p +Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e(store)h(ch\))396 +4581 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)486 4678 +y(end)396 4775 y(;;)p Black 3800 5278 a Fr(40)p Black +eop +%%Page: 41 41 +41 40 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black -2 583 a Fp(2.4.11.)36 b(Class)f Fc(code)396 751 +y Fv(The)20 b Fq(code)g Fv(type)g(is)h(con)m(v)o(erted)d(to)i(a)h +Fq(pre)f Fv(section)g(\(preformatted)d(te)o(xt\).)i(As)i(the)g(meaning) +d(of)i(tabs)h(is)g(unspeci\002ed)e(in)396 859 y(HTML,)h(tabs)g(are)h(e) +o(xpanded)c(to)k(spaces.)396 1039 y Fq(class)44 b(code)g(=)486 +1136 y(object)g(\(self\))576 1233 y(inherit)f(shared)576 +1427 y(method)g(to_html)h(store)g(ch)h(=)665 1525 y(let)g(data)f(=)g +(self)h(#)f(node)g(#)h(data)f(in)665 1622 y(\(*)h(convert)e(tabs)i(*\)) +665 1719 y(let)g(l)f(=)h(String.length)e(data)h(in)665 +1816 y(let)h(rec)f(preprocess)f(i)i(column)f(=)396 1913 +y(\(*)h(this)f(is)g(very)h(ineffective)e(but)h(comprehensive:)e(*\))396 +2010 y(if)j(i)f Fo(<)h Fq(l)g(then)486 2107 y(match)f(data.[i])f(with) +665 2205 y('\\t')h(-)p Fo(>)396 2302 y Fq(let)h(n)f(=)h(8)g(-)f +(\(column)g(mod)g(8\))h(in)396 2399 y(String.make)e(n)i(')g(')f(^)h +(preprocess)e(\(i+1\))h(\(column)g(+)g(n\))576 2496 y(|)g('\\n')g(-)p +Fo(>)396 2593 y Fq("\\n")g(^)h(preprocess)e(\(i+1\))h(0)576 +2690 y(|)g(c)h(-)p Fo(>)396 2787 y Fq(String.make)e(1)i(c)g(^)f +(preprocess)f(\(i+1\))h(\(column)g(+)h(1\))396 2884 y(else)486 +2982 y("")665 3079 y(in)665 3176 y(output_string)e(ch)h(")p +Fo(<)p Fq(p)p Fo(><)p Fq(pre)p Fo(>)p Fq(";)665 3273 +y(output_string)f(ch)h(\(escape_html)f(\(preprocess)g(0)i(0\)\);)665 +3370 y(output_string)e(ch)h(")p Fo(<)p Fq(/pre)p Fo(><)p +Fq(/p)p Fo(>)p Fq(";)486 3564 y(end)396 3662 y(;;)-2 +4114 y Fp(2.4.12.)36 b(Class)f Fc(a)396 4282 y Fv(Hyperlinks,)19 +b(e)o(xpressed)g(by)g(the)i Fq(a)f Fv(element)g(type,)f(are)h(con)m(v)o +(erted)e(to)i(the)g(HTML)g Fq(a)h Fv(type.)e(If)i(the)f(tar)o(get)f(of) +h(the)396 4390 y(hyperlink)e(is)j(gi)n(v)o(en)d(by)i +Fq(href)p Fv(,)g(the)g(URL)g(of)g(this)g(attrib)n(ute)g(can)g(be)g +(used)g(directly)-5 b(.)18 b(Alternati)n(v)o(ely)-5 b(,)18 +b(the)i(tar)o(get)f(can)h(be)396 4498 y(gi)n(v)o(en)f(by)h +Fq(readmeref)f Fv(in)i(which)e(case)i(the)f(".html")g(suf)n(\002x)f +(must)i(be)f(added)f(to)h(the)g(\002le)h(name.)396 4647 +y(Note)f(that)h(within)f Fq(a)g Fv(only)g(#PCD)m(A)-9 +b(T)h(A)20 b(is)h(allo)n(wed,)e(so)i(the)f(contents)f(can)h(be)g(con)m +(v)o(erted)e(directly)h(by)h(applying)396 4755 y Fq(escape_html)f +Fv(to)i(the)f(character)f(data)h(contents.)p Black 3800 +5278 a Fr(41)p Black eop +%%Page: 42 42 +42 41 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fq(class)44 b(a)h(=)486 676 y(object)f(\(self\))576 +773 y(inherit)f(shared)576 967 y(method)g(to_html)h(store)g(ch)h(=)665 +1065 y(output_string)e(ch)h(")p Fo(<)p Fq(a)h(";)665 +1162 y(let)g(href)f(=)396 1259 y(match)g(self)g(#)h(node)f(#)h +(attribute)e("href")h(with)576 1356 y(Value)g(v)g(-)p +Fo(>)h Fq(escape_html)e(v)486 1453 y(|)i(Valuelist)e(_)i(-)p +Fo(>)f Fq(assert)g(false)486 1550 y(|)h(Implied_value)d(-)p +Fo(>)665 1647 y Fq(begin)i(match)g(self)g(#)h(node)f(#)h(attribute)e +("readmeref")g(with)486 1745 y(Value)h(v)h(-)p Fo(>)f +Fq(escape_html)f(v)i(^)f(".html")396 1842 y(|)h(Valuelist)e(_)i(-)p +Fo(>)f Fq(assert)g(false)396 1939 y(|)h(Implied_value)e(-)p +Fo(>)576 2036 y Fq("")665 2133 y(end)665 2230 y(in)665 +2327 y(if)i(href)f Fo(<>)g Fq("")h(then)396 2424 y(output_string)e(ch)h +(\("href=\\"")88 b(^)45 b(href)f(^)h("\\""\);)665 2522 +y(output_string)e(ch)h(")p Fo(>)p Fq(";)665 2619 y(output_string)f(ch)h +(\(escape_html)f(\(self)h(#)h(node)f(#)h(data\)\);)665 +2716 y(output_string)e(ch)h(")p Fo(<)p Fq(/a)p Fo(>)p +Fq(";)486 2910 y(end)396 3007 y(;;)-2 3460 y Fp(2.4.13.)36 +b(Class)f Fc(footnote)396 3628 y Fv(The)20 b Fq(footnote)g +Fv(class)h(has)f(tw)o(o)h(methods:)e Fq(to_html)g Fv(to)i(con)m(v)o +(ert)d(the)i(footnote)f(reference)f(to)i(HTML,)g(and)396 +3736 y Fq(footnote_to_html)e Fv(to)j(con)m(v)o(ert)d(the)i(footnote)f +(te)o(xt)h(itself.)396 3885 y(The)g(footnote)f(reference)f(is)j(con)m +(v)o(erted)d(to)i(a)h(local)f(hyperlink;)e(more)h(precisely)-5 +b(,)19 b(to)h(tw)o(o)h(anchor)d(tags)j(which)e(are)396 +3993 y(connected)g(with)h(each)g(other)-5 b(.)19 b(The)h(te)o(xt)g +(anchor)f(points)h(to)g(the)g(footnote)f(anchor)m(,)f(and)h(the)i +(footnote)d(anchor)h(points)396 4101 y(to)i(the)f(te)o(xt)g(anchor)-5 +b(.)396 4250 y(The)20 b(footnote)f(must)h(be)g(allocated)f(in)i(the)f +Fq(store)g Fv(object.)f(By)i(allocating)e(the)h(footnote,)f(you)g(get)h +(the)g(number)f(of)396 4358 y(the)h(footnote,)f(and)g(the)i(te)o(xt)f +(of)f(the)i(footnote)d(is)j(stored)f(until)g(the)g(end)g(of)g(the)g +(HTML)g(page)f(is)j(reached)c(when)i(the)396 4466 y(footnotes)f(can)h +(be)g(printed.)f(The)h Fq(to_html)f Fv(method)g(stores)i(simply)e(the)i +(object)e(itself,)i(such)f(that)g(the)396 4574 y Fq(footnote_to_html)e +Fv(method)h(is)i(in)m(v)n(ok)o(ed)e(on)g(the)i(same)f(object)g(that)g +(encountered)d(the)k(footnote.)p Black 3800 5278 a Fr(42)p +Black eop +%%Page: 43 43 +43 42 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black 396 579 a Fv(The)g Fq(to_html)g Fv(only)f(allocates)h(the)h +(footnote,)d(and)h(prints)h(the)g(reference)f(anchor)m(,)f(b)n(ut)i(it) +h(does)f(not)g(print)g(nor)396 687 y(con)m(v)o(ert)e(the)j(contents)e +(of)h(the)g(note.)g(This)g(is)h(deferred)d(until)j(the)f(footnotes)e +(actually)i(get)g(printed,)f(i.e.)h(the)g(recursi)n(v)o(e)396 +795 y(call)h(of)f Fq(to_html)f Fv(on)h(the)g(sub)g(nodes)g(is)h(done)e +(by)h Fq(footnote_to_html)p Fv(.)396 944 y(Note)g(that)h(this)f +(technique)f(does)h(not)g(w)o(ork)f(if)i(you)e(mak)o(e)h(another)f +(footnote)f(within)i(a)h(footnote;)d(the)i(second)396 +1052 y(footnote)f(gets)h(allocated)g(b)n(ut)g(not)g(printed.)396 +1274 y Fq(class)44 b(footnote)g(=)486 1371 y(object)g(\(self\))576 +1468 y(inherit)f(shared)576 1662 y(val)h(mutable)g(footnote_number)e(=) +j(0)576 1857 y(method)e(to_html)h(store)g(ch)h(=)665 +1954 y(let)g(number)e(=)396 2051 y(store)h(#)h(alloc_footnote)d(\(self) +i(:)h(#shared)f(:)p Fo(>)g Fq(footnote_printer\))e(in)665 +2148 y(let)j(foot_anchor)e(=)396 2245 y("footnote")g(^)i(string_of_int) +e(number)h(in)665 2342 y(let)h(text_anchor)e(=)396 2439 +y("textnote")g(^)i(string_of_int)e(number)h(in)665 2537 +y(footnote_number)f Fo(<)p Fq(-)h(number;)665 2634 y(output_string)f +(ch)h(\()h(")p Fo(<)p Fq(a)f(name=\\"")g(^)g(text_anchor)f(^)i("\\")f +(href=\\"#")g(^)441 2731 y(foot_anchor)f(^)i("\\")p Fo(>)p +Fq([")e(^)i(string_of_int)e(number)h(^)441 2828 y("])p +Fo(<)p Fq(/a)p Fo(>)p Fq(")g(\))576 3022 y(method)f(footnote_to_html)g +(store)h(ch)g(=)665 3119 y(\(*)h(prerequisite:)d(we)j(are)f(in)h(a)f +(definition)g(list)g Fo(<)p Fq(dl)p Fo(>)p Fq(...)p Fo(<)p +Fq(/dl)p Fo(>)e Fq(*\))665 3217 y(let)j(foot_anchor)e(=)396 +3314 y("footnote")g(^)i(string_of_int)e(footnote_number)f(in)665 +3411 y(let)j(text_anchor)e(=)396 3508 y("textnote")g(^)i(string_of_int) +e(footnote_number)f(in)665 3605 y(output_string)h(ch)h(\(")p +Fo(<)p Fq(dt)p Fo(><)p Fq(a)g(name=\\"")f(^)i(foot_anchor)e(^)h("\\")h +(href=\\"#")e(^)396 3702 y(text_anchor)g(^)i("\\")p Fo(>)p +Fq([")f(^)g(string_of_int)f(footnote_number)f(^)396 3799 +y("])p Fo(<)p Fq(/a)p Fo(><)p Fq(/dt)p Fo(>)p Fq(\\n)p +Fo(<)p Fq(dd)p Fo(>)p Fq("\);)665 3896 y(List.iter)396 +3994 y(\(fun)i(n)h(-)p Fo(>)f Fq(n)h(#)g(extension)e(#)i(to_html)e +(store)h(ch\))396 4091 y(\(self)g(#)h(node)f(#)h(sub_nodes\);)665 +4188 y(output_string)e(ch)h(\("\\n)p Fo(<)p Fq(/dd)p +Fo(>)p Fq("\))486 4382 y(end)396 4479 y(;;)p Black 3800 +5278 a Fr(43)p Black eop +%%Page: 44 44 +44 43 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black -2 583 a Fp(2.4.14.)36 b(The)d(speci\002cation)j(of)e(the)f +(document)i(model)396 751 y Fv(This)21 b(code)e(sets)i(up)f(the)g(hash) +g(table)g(that)h(connects)e(element)h(types)g(with)g(the)g(e)o(x)o +(emplars)f(of)h(the)g(e)o(xtension)f(classes)396 859 +y(that)i(con)m(v)o(ert)d(the)i(elements)g(to)g(HTML.)396 +1039 y Fq(open)44 b(Pxp_yacc)396 1233 y(let)h(tag_map)e(=)486 +1330 y(make_spec_from_alist)576 1427 y(~data_exemplar:\(new)e +(data_impl)j(\(new)g(only_data\)\))576 1525 y +(~default_element_exemplar:\(new)39 b(element_impl)k(\(new)h +(no_markup\)\))576 1622 y(~element_alist:)665 1719 y([)h("readme",)e +(\(new)h(element_impl)f(\(new)h(readme\)\);)396 1816 +y("sect1",)89 b(\(new)44 b(element_impl)f(\(new)h(sect1\)\);)396 +1913 y("sect2",)89 b(\(new)44 b(element_impl)f(\(new)h(sect2\)\);)396 +2010 y("sect3",)89 b(\(new)44 b(element_impl)f(\(new)h(sect3\)\);)396 +2107 y("title",)89 b(\(new)44 b(element_impl)f(\(new)h(no_markup\)\);) +396 2205 y("p",)269 b(\(new)44 b(element_impl)f(\(new)h(p\)\);)396 +2302 y("br",)224 b(\(new)44 b(element_impl)f(\(new)h(br\)\);)396 +2399 y("code",)134 b(\(new)44 b(element_impl)f(\(new)h(code\)\);)396 +2496 y("em",)224 b(\(new)44 b(element_impl)f(\(new)h(em\)\);)396 +2593 y("ul",)224 b(\(new)44 b(element_impl)f(\(new)h(ul\)\);)396 +2690 y("li",)224 b(\(new)44 b(element_impl)f(\(new)h(li\)\);)396 +2787 y("footnote",)f(\(new)h(element_impl)f(\(new)h(footnote)g(:)h +(#shared)e(:)p Fo(>)i Fq(shared\)\);)396 2884 y("a",)269 +b(\(new)44 b(element_impl)f(\(new)h(a\)\);)665 2982 y(])576 +3079 y(\(\))396 3176 y(;;)-2 3678 y Fx(Notes)p Black +396 3857 a Fv(1.)p Black 70 w(Elements)20 b(may)g(also)g(contain)f +(processing)g(instructions.)g(Unlik)o(e)h(other)f(document)g(models,)g +(PXP)i(separates)529 3965 y(processing)e(instructions)g(from)g(the)i +(rest)f(of)g(the)g(te)o(xt)g(and)g(pro)o(vides)e(a)j(second)e(interf)o +(ace)h(to)g(access)h(them)529 4073 y(\(method)e Fq(pinstr)p +Fv(\).)g(Ho)n(we)n(v)o(er)m(,)f(there)h(is)j(a)e(parser)g(option)f(\()p +Fq(enable_pinstr_nodes)p Fv(\))e(which)i(changes)g(the)529 +4181 y(beha)n(viour)f(of)i(the)g(parser)g(such)g(that)g(e)o(xtra)g +(nodes)f(for)h(processing)e(instructions)i(are)g(included)e(into)i(the) +h(tree.)529 4320 y Fi(Furthermore,)e(the)g(tree)g(does)g(normally)h +(not)f(contain)h(nodes)g(for)e(XML)h(comments;)h(the)o(y)f(are)g +(ignored)h(by)f(def)o(ault.)g(Again,)529 4417 y(there)g(is)g(an)g +(option)h(\()p Fh(enable_comment_nodes)p Fi(\))25 b(changing)c(this.)p +Black 396 4566 a Fv(2.)p Black 70 w(Due)f(to)h(the)f(typing)f(system)h +(it)h(is)g(more)e(or)h(less)i(impossible)d(to)i(deri)n(v)o(e)d(recursi) +n(v)o(e)h(classes)i(in)g(O'Caml.)f(T)-7 b(o)20 b(get)529 +4674 y(around)e(this,)j(it)g(is)g(common)d(practice)i(to)g(put)g(the)g +(modi\002able)f(or)h(e)o(xtensible)f(part)h(of)g(recursi)n(v)o(e)f +(objects)h(into)529 4782 y(parallel)g(objects.)p Black +3800 5278 a Fr(44)p Black eop +%%Page: 45 45 +45 44 bop Black 3136 67 a Fr(Chapter)20 b(2.)g(Using)g(PXP)p +Black Black 396 579 a Fv(3.)p Black 70 w(The)g(problem)e(is)k(that)e +(the)g(subclass)h(is)g(usually)e(not)h(a)h(subtype)e(in)h(this)h(case)f +(because)g(O'Caml)g(has)h(a)529 687 y(contra)n(v)n(ariant)d(subtyping)g +(rule.)p Black 3800 5278 a Fr(45)p Black eop +%%Page: 46 46 +46 45 bop Black Black -2 621 a Fs(Chapter)48 b(3.)f(The)h(objects)g +(representing)g(the)-2 845 y(document)396 1093 y Fr(This)21 +b(description)e(might)h(be)g(out-of-date)o(.)e(See)i(the)g(module)f +(interface)h(\002les)g(for)h(updated)d(information.)-2 +1470 y Fx(3.1.)39 b(The)g Fb(document)44 b Fx(c)m(lass)396 +1722 y Fq(class)g([)h('ext)f(])h(document)e(:)486 1819 +y(Pxp_types.collect_warnings)d(->)486 1916 y(object)576 +2013 y(method)j(init_xml_version)g(:)h(string)g(->)h(unit)576 +2111 y(method)e(init_root)h(:)g('ext)h(node)f(->)g(unit)576 +2305 y(method)f(xml_version)g(:)i(string)576 2402 y(method)e +(xml_standalone)g(:)i(bool)576 2499 y(method)e(dtd)i(:)f(dtd)576 +2596 y(method)f(root)i(:)f('ext)g(node)576 2791 y(method)f(encoding)h +(:)h(Pxp_types.rep_encoding)576 2985 y(method)e(add_pinstr)h(:)g +(proc_instruction)e(->)j(unit)576 3082 y(method)e(pinstr)h(:)h(string)f +(->)g(proc_instruction)e(list)576 3179 y(method)h(pinstr_names)g(:)i +(string)f(list)576 3373 y(method)f(write)h(:)h(Pxp_types.output_stream) +c(->)k(Pxp_types.encoding)c(->)k(unit)486 3568 y(end)396 +3665 y(;;)396 3856 y Fv(The)20 b(methods)f(be)o(ginning)f(with)i +Fq(init_)g Fv(are)g(only)g(for)f(internal)h(use)g(of)g(the)g(parser)-5 +b(.)p Black 396 4088 a Ft(\225)p Black 60 w Fq(xml_version)p +Fv(:)19 b(returns)h(the)g(v)o(ersion)f(string)h(at)g(the)g(be)o +(ginning)e(of)i(the)g(document.)e(F)o(or)i(e)o(xample,)f("1.0")g(is)479 +4196 y(returned)g(if)h(the)g(document)f(be)o(gins)g(with)h +Fo(<)p Fq(?xml)44 b(version="1.0"?)p Fo(>)p Fv(.)p Black +396 4304 a Ft(\225)p Black 60 w Fq(xml_standalone)p Fv(:)19 +b(returns)g(the)h(boolean)f(v)n(alue)g(of)h Fq(standalone)f +Fv(declaration)g(in)h(the)h(XML)f(declaration.)e(If)479 +4412 y(the)i Fq(standalone)g Fv(attrib)n(ute)f(is)i(missing,)f +Fq(false)g Fv(is)h(returned.)p Black 396 4520 a Ft(\225)p +Black 60 w Fq(dtd)p Fv(:)g(returns)e(a)i(reference)d(to)i(the)h(global) +e(DTD)h(object.)p Black 396 4628 a Ft(\225)p Black 60 +w Fq(root)p Fv(:)g(returns)g(a)g(reference)f(to)h(the)g(root)g +(element.)p Black 396 4736 a Ft(\225)p Black 60 w Fq(encoding)p +Fv(:)g(returns)f(the)h(internal)g(encoding)e(of)i(the)g(document.)e +(This)i(means)g(that)g(all)h(strings)f(of)g(which)g(the)479 +4844 y(document)e(consists)j(are)f(encoded)f(in)h(this)h(character)e +(set.)p Black 3798 5278 a Fr(46)p Black eop +%%Page: 47 47 +47 46 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p +Black 60 w Fq(pinstr)p Fv(:)g(returns)f(the)i(processing)d +(instructions)i(outside)f(the)h(DTD)h(and)e(outside)h(the)g(root)g +(element.)f(The)479 687 y(ar)o(gument)f(passed)i(to)h(the)f(method)f +(names)g(a)i Fr(tar)m(g)o(et)q Fv(,)g(and)e(the)h(method)f(returns)g +(all)i(instructions)e(with)i(this)g(tar)o(get.)479 795 +y(The)f(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(inside)h +Fo(<)p Fq(?)h Fv(and)e Fq(?)p Fo(>)p Fv(.)p Black 396 +903 a Ft(\225)p Black 60 w Fq(pinstr_names)p Fv(:)g(returns)g(the)i +(names)e(of)h(the)h(processing)d(instructions)p Black +396 1011 a Ft(\225)p Black 60 w Fq(add_pinstr)p Fv(:)h(adds)h(another)f +(processing)g(instruction.)f(This)j(method)e(is)i(used)f(by)f(the)h +(parser)g(itself)h(to)f(enter)g(the)479 1119 y(instructions)f(returned) +g(by)h Fq(pinstr)p Fv(,)f(b)n(ut)h(you)g(can)g(also)g(enter)g +(additional)f(instructions.)p Black 396 1226 a Ft(\225)p +Black 60 w Fq(write)p Fv(:)h(writes)h(the)f(document)e(to)j(the)f +(passed)g(stream)g(as)h(XML)f(te)o(xt)g(using)g(the)g(passed)g(\(e)o +(xternal\))e(encoding.)479 1334 y(The)i(generated)f(te)o(xt)h(is)h(al)o +(w)o(ays)f(v)n(alid)g(XML)g(and)g(can)g(be)g(parsed)g(by)f(PXP;)i(ho)n +(we)n(v)o(er)m(,)d(the)i(te)o(xt)g(is)h(badly)479 1442 +y(formatted)e(\(this)h(is)h(not)f(a)h(pretty)e(printer\).)-2 +1861 y Fx(3.2.)39 b(The)g(c)m(lass)g(type)g Fb(node)396 +2041 y Fv(From)20 b Fq(Pxp_document)p Fv(:)396 2221 y +Fq(type)44 b(node_type)g(=)486 2318 y(T_data)396 2415 +y(|)h(T_element)e(of)i(string)396 2512 y(|)g(T_super_root)396 +2609 y(|)g(T_pinstr)e(of)i(string)396 2706 y(|)g(T_comment)396 +2804 y Fn(and)g(some)f(other,)g(reserved)f(types)396 +2901 y Fq(;;)396 3095 y(class)h(type)g([)h('ext)f(])h(node)f(=)486 +3192 y(object)g(\('self\))576 3289 y(constraint)f('ext)h(=)h('ext)f +(node)g(#extension)576 3484 y(\(*)g Fn(General)g(observers)f +Fq(*\))576 3678 y(method)g(extension)h(:)g('ext)576 3775 +y(method)f(dtd)i(:)f(dtd)576 3872 y(method)f(parent)h(:)h('ext)f(node) +576 3969 y(method)f(root)i(:)f('ext)g(node)576 4066 y(method)f +(sub_nodes)h(:)g('ext)h(node)f(list)576 4164 y(method)f(iter_nodes)h(:) +g(\('ext)g(node)g(-)p Fo(>)h Fq(unit\))f(-)p Fo(>)g Fq(unit)576 +4261 y(method)f(iter_nodes_sibl)g(:)889 4358 y(\('ext)h(node)h(option)e +(-)p Fo(>)i Fq('ext)f(node)g(-)p Fo(>)g Fq('ext)h(node)f(option)g(-)p +Fo(>)g Fq(unit\))g(-)396 4455 y Fo(>)h Fq(unit)576 4552 +y(method)e(node_type)h(:)g(node_type)576 4649 y(method)f(encoding)h(:)h +(Pxp_types.rep_encoding)576 4746 y(method)e(data)i(:)f(string)576 +4843 y(method)f(position)h(:)h(\(string)e(*)i(int)f(*)h(int\))p +Black 3797 5278 a Fr(47)p Black eop +%%Page: 48 48 +48 47 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 576 579 a Fq(method)43 +b(comment)h(:)h(string)f(option)576 676 y(method)f(pinstr)h(:)h(string) +f(-)p Fo(>)g Fq(proc_instruction)e(list)576 773 y(method)h +(pinstr_names)g(:)i(string)f(list)576 870 y(method)f(write)h(:)h +(Pxp_types.output_stream)c(->)k(Pxp_types.encoding)c(->)k(unit)576 +1065 y(\(*)f Fn(Attribute)f(observers)h Fq(*\))576 1259 +y(method)f(attribute)h(:)g(string)g(-)p Fo(>)h Fq(Pxp_types.att_value) +576 1356 y(method)e(required_string_attribute)e(:)k(string)f(-)p +Fo(>)g Fq(string)576 1453 y(method)f(optional_string_attribute)e(:)k +(string)f(-)p Fo(>)g Fq(string)g(option)576 1550 y(method)f +(required_list_attribute)e(:)k(string)f(-)p Fo(>)g Fq(string)g(list)576 +1647 y(method)f(optional_list_attribute)e(:)k(string)f(-)p +Fo(>)g Fq(string)g(list)576 1745 y(method)f(attribute_names)g(:)h +(string)g(list)576 1842 y(method)f(attribute_type)g(:)i(string)e(-)p +Fo(>)i Fq(Pxp_types.att_type)576 1939 y(method)e(attributes)h(:)g +(\(string)g(*)h(Pxp_types.att_value\))c(list)576 2036 +y(method)i(id_attribute_name)f(:)j(string)576 2133 y(method)e +(id_attribute_value)f(:)j(string)576 2230 y(method)e +(idref_attribute_names)f(:)i(string)576 2424 y(\(*)g +Fn(Modifying)f(methods)h Fq(*\))576 2619 y(method)f(add_node)h(:)h +(?force:bool)e(-)p Fo(>)h Fq('ext)g(node)g(-)p Fo(>)h +Fq(unit)576 2716 y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p +Fo(>)j Fq(unit)576 2813 y(method)e(delete)h(:)h(unit)576 +2910 y(method)e(set_nodes)h(:)g('ext)h(node)f(list)g(-)p +Fo(>)g Fq(unit)576 3007 y(method)f(quick_set_attributes)f(:)j(\(string) +e(*)i(Pxp_types.att_value\))c(list)j(-)p Fo(>)h Fq(unit)576 +3104 y(method)e(set_comment)g(:)i(string)f(option)g(-)p +Fo(>)g Fq(unit)576 3299 y(\(*)g Fn(Cloning)g(methods)f +Fq(*\))576 3493 y(method)g(orphaned_clone)g(:)i('self)576 +3590 y(method)e(orphaned_flat_clone)f(:)j('self)576 3687 +y(method)e(create_element)g(:)1024 3784 y(?position:\(string)f(*)j(int) +f(*)h(int\))f(-)p Fo(>)1024 3882 y Fq(dtd)g(-)p Fo(>)h +Fq(node_type)e(-)p Fo(>)h Fq(\(string)g(*)h(string\))e(list)h(-)p +Fo(>)1203 3979 y Fq('ext)g(node)576 4076 y(method)f(create_data)g(:)i +(dtd)f(-)p Fo(>)h Fq(string)f(-)p Fo(>)g Fq('ext)g(node)576 +4173 y(method)f(keep_always_whitespace_mode)e(:)j(unit)576 +4367 y(\(*)g Fn(Validating)f(methods)h Fq(*\))576 4561 +y(method)f(local_validate)g(:)i(?use_dfa:bool)d(->)j(unit)f(->)g(unit) +576 4756 y(\(*)g(...)g(Internal)g(methods)g(are)g(undocumented.)f(*\))p +Black 3800 5278 a Fr(48)p Black eop +%%Page: 49 49 +49 48 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 486 579 a Fq(end)396 +676 y(;;)396 867 y Fv(In)g(the)g(module)f Fq(Pxp_types)g +Fv(you)h(can)g(\002nd)g(another)e(type)i(de\002nition)f(that)h(is)i +(important)c(in)j(this)f(conte)o(xt:)396 1047 y Fq(type)44 +b(Pxp_types.att_value)e(=)576 1144 y(Value)223 b(of)44 +b(string)486 1241 y(|)h(Valuelist)e(of)h(string)g(list)486 +1339 y(|)h(Implied_value)396 1436 y(;;)-2 1847 y Fp(3.2.1.)35 +b(The)f(structure)f(of)g(document)i(trees)396 2015 y +Fv(A)21 b(node)e(represents)g(either)h(an)g(element)g(or)g(a)g +(character)f(data)h(section.)g(There)g(are)g(tw)o(o)g(classes)h +(implementing)d(the)396 2122 y(tw)o(o)j(aspects)f(of)g(nodes:)g +Fq(element_impl)e Fv(and)i Fq(data_impl)p Fv(.)f(The)h(latter)g(class)h +(does)f(not)g(implement)f(all)i(methods)396 2230 y(because)f(some)g +(methods)f(do)h(not)g(mak)o(e)f(sense)i(for)e(data)h(nodes.)396 +2380 y(\(Note:)g(PXP)h(also)g(supports)e(a)h(mode)g(which)f(forces)h +(that)g(processing)f(instructions)g(and)h(comments)f(are)396 +2488 y(represented)g(as)i(nodes)e(of)h(the)g(document)e(tree.)i(Ho)n +(we)n(v)o(er)m(,)e(these)j(nodes)e(are)h(instances)g(of)g +Fq(element_impl)f Fv(with)396 2596 y(node)g(types)h Fq(T_pinstr)g +Fv(and)f Fq(T_comment)p Fv(,)g(respecti)n(v)o(ely)-5 +b(.)18 b(This)j(mode)e(must)h(be)g(e)o(xplicitly)g(con\002gured;)d(the) +k(basic)396 2704 y(representation)d(kno)n(ws)i(only)f(element)h(and)f +(data)h(nodes.\))396 2853 y(The)g(follo)n(wing)f(\002gure)g(\()p +Fr(A)h(tr)m(ee)h(with)g(element)f(nodes,)f(data)g(nodes,)h(and)f +(attrib)n(utes)p Fv(\))h(sho)n(ws)g(an)g(e)o(xample)f(ho)n(w)h(a)396 +2961 y(tree)g(is)i(constructed)c(from)h(element)h(and)f(data)i(nodes.)e +(The)h(circular)f(areas)h(represent)f(element)h(nodes)f(whereas)h(the) +396 3069 y(o)o(v)n(als)f(denote)f(data)i(nodes.)e(Only)h(elements)g +(may)g(ha)n(v)o(e)g(subnodes;)f(data)h(nodes)g(are)g(al)o(w)o(ays)h +(lea)n(v)o(es)f(of)h(the)f(tree.)g(The)396 3177 y(subnodes)g(of)h(an)g +(element)g(can)g(be)g(either)g(element)f(or)h(data)g(nodes;)g(in)g +(both)f(cases)i(the)g(O'Caml)f(objects)g(storing)f(the)396 +3285 y(nodes)h(ha)n(v)o(e)f(the)i(class)g(type)e Fq(node)p +Fv(.)396 3434 y(Attrib)n(utes)h(\(the)g(clouds)g(in)g(the)g(picture\))f +(are)h(not)g(directly)g(inte)o(grated)e(into)i(the)g(tree;)h(there)e +(is)i(al)o(w)o(ays)g(an)f(e)o(xtra)g(link)396 3542 y(to)h(the)f(attrib) +n(ute)g(list.)h(This)f(is)h(also)g(true)f(for)f(processing)g +(instructions)g(\(not)h(sho)n(wn)f(in)h(the)h(picture\).)d(This)j +(means)396 3650 y(that)g(there)e(are)h(separated)g(access)g(methods)g +(for)f(attrib)n(utes)h(and)g(processing)f(instructions.)p +Black 3800 5278 a Fr(49)p Black eop +%%Page: 50 50 +50 49 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-1.)f(A)i +(tr)o(ee)e(with)i(element)f(nodes,)h(data)e(nodes,)i(and)f(attrib)n +(utes)396 2578 y + currentpoint currentpoint translate 1 1 scale neg exch neg exch translate + 396 2578 a @beginspecial 0 @llx 0 @lly +329 @urx 218 @ury 3290 @rwi @setspecial +%%BeginDocument: pic/node_term.ps +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: src/pic/node_term.fig +%%Creator: fig2dev Version 3.2 Patchlevel 1 +%%CreationDate: Sun Aug 27 02:05:42 2000 +%%For: gerd@ice (Gerd Stolpmann) +%%Orientation: Portrait +%%BoundingBox: 0 0 329 218 +%%Pages: 0 +%%BeginSetup +%%EndSetup +%%Magnification: 0.8000 +%%EndComments +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def + +end +save +-1.0 251.0 translate +1 -1 scale + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def +/reencdict 12 dict def /ReEncode { reencdict begin +/newcodesandnames exch def /newfontname exch def /basefontname exch def +/basefontdict basefontname findfont def /newfont basefontdict maxlength dict def +basefontdict { exch dup /FID ne { dup /Encoding eq +{ exch dup length array copy newfont 3 1 roll put } +{ exch newfont 3 1 roll put } ifelse } { pop pop } ifelse } forall +newfont /FontName newfontname put newcodesandnames aload pop +128 1 255 { newfont /Encoding get exch /.notdef put } for +newcodesandnames length 2 idiv { newfont /Encoding get 3 1 roll put } repeat +newfontname newfont definefont pop end } def +/isovec [ +8#200 /grave 8#201 /acute 8#202 /circumflex 8#203 /tilde +8#204 /macron 8#205 /breve 8#206 /dotaccent 8#207 /dieresis +8#210 /ring 8#211 /cedilla 8#212 /hungarumlaut 8#213 /ogonek 8#214 /caron +8#220 /dotlessi 8#230 /oe 8#231 /OE +8#240 /space 8#241 /exclamdown 8#242 /cent 8#243 /sterling +8#244 /currency 8#245 /yen 8#246 /brokenbar 8#247 /section 8#250 /dieresis +8#251 /copyright 8#252 /ordfeminine 8#253 /guillemotleft 8#254 /logicalnot +8#255 /endash 8#256 /registered 8#257 /macron 8#260 /degree 8#261 /plusminus +8#262 /twosuperior 8#263 /threesuperior 8#264 /acute 8#265 /mu 8#266 /paragraph +8#267 /periodcentered 8#270 /cedilla 8#271 /onesuperior 8#272 /ordmasculine +8#273 /guillemotright 8#274 /onequarter 8#275 /onehalf +8#276 /threequarters 8#277 /questiondown 8#300 /Agrave 8#301 /Aacute +8#302 /Acircumflex 8#303 /Atilde 8#304 /Adieresis 8#305 /Aring +8#306 /AE 8#307 /Ccedilla 8#310 /Egrave 8#311 /Eacute +8#312 /Ecircumflex 8#313 /Edieresis 8#314 /Igrave 8#315 /Iacute +8#316 /Icircumflex 8#317 /Idieresis 8#320 /Eth 8#321 /Ntilde 8#322 /Ograve +8#323 /Oacute 8#324 /Ocircumflex 8#325 /Otilde 8#326 /Odieresis 8#327 /multiply +8#330 /Oslash 8#331 /Ugrave 8#332 /Uacute 8#333 /Ucircumflex +8#334 /Udieresis 8#335 /Yacute 8#336 /Thorn 8#337 /germandbls 8#340 /agrave +8#341 /aacute 8#342 /acircumflex 8#343 /atilde 8#344 /adieresis 8#345 /aring +8#346 /ae 8#347 /ccedilla 8#350 /egrave 8#351 /eacute +8#352 /ecircumflex 8#353 /edieresis 8#354 /igrave 8#355 /iacute +8#356 /icircumflex 8#357 /idieresis 8#360 /eth 8#361 /ntilde 8#362 /ograve +8#363 /oacute 8#364 /ocircumflex 8#365 /otilde 8#366 /odieresis 8#367 /divide +8#370 /oslash 8#371 /ugrave 8#372 /uacute 8#373 /ucircumflex +8#374 /udieresis 8#375 /yacute 8#376 /thorn 8#377 /ydieresis] def +/Helvetica-Bold /Helvetica-Bold-iso isovec ReEncode +/Helvetica /Helvetica-iso isovec ReEncode +/Helvetica-Oblique /Helvetica-Oblique-iso isovec ReEncode + /DrawEllipse { + /endangle exch def + /startangle exch def + /yrad exch def + /xrad exch def + /y exch def + /x exch def + /savematrix mtrx currentmatrix def + x y tr xrad yrad sc 0 0 1 startangle endangle arc + closepath + savematrix setmatrix + } def + +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def +%%EndProlog + +$F2psBegin +10 setmiterlimit +n -1000 5962 m -1000 -1000 l 7537 -1000 l 7537 5962 l cp clip + 0.05039 0.05039 sc +% Polyline +7.500 slw +n 1770 2700 m 1665 2700 1665 3045 105 arcto 4 {pop} repeat + 1665 3150 2730 3150 105 arcto 4 {pop} repeat + 2835 3150 2835 2805 105 arcto 4 {pop} repeat + 2835 2700 1770 2700 105 arcto 4 {pop} repeat + cp gs col7 0.75 shd ef gr gs col0 s gr +% Ellipse +n 2250 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 1575 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 2925 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 900 2925 242 242 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Polyline +n 420 3825 m 315 3825 315 4170 105 arcto 4 {pop} repeat + 315 4275 1380 4275 105 arcto 4 {pop} repeat + 1485 4275 1485 3930 105 arcto 4 {pop} repeat + 1485 3825 420 3825 105 arcto 4 {pop} repeat + cp gs col7 0.75 shd ef gr gs col0 s gr +% Polyline +n 2085 1275 m 1582 1807 l gs col0 s gr +% Polyline +n 2407 1297 m 2940 1800 l gs col0 s gr +% Polyline +n 1417 2190 m 900 2692 l gs col0 s gr +% Polyline +n 1740 2190 m 2257 2700 l gs col0 s gr +% Polyline +n 892 3180 m 892 3825 l gs col0 s gr +% Polyline +n 45 675 m 6525 675 l 6525 4950 l 45 4950 l cp gs col0 s gr +% Polyline +n 2250 3600 m 2263 3597 l 2277 3594 l 2293 3592 l 2309 3589 l 2326 3586 l + 2344 3583 l 2362 3580 l 2381 3578 l 2399 3575 l 2418 3572 l + 2436 3569 l 2454 3566 l 2471 3563 l 2488 3561 l 2504 3558 l + 2520 3555 l 2537 3552 l 2555 3548 l 2571 3545 l 2588 3541 l + 2604 3537 l 2621 3533 l 2637 3528 l 2653 3524 l 2669 3520 l + 2684 3517 l 2700 3514 l 2715 3512 l 2730 3510 l 2745 3510 l + 2762 3511 l 2777 3512 l 2793 3514 l 2807 3517 l 2821 3520 l + 2835 3524 l 2849 3528 l 2863 3532 l 2877 3537 l 2893 3542 l + 2908 3548 l 2925 3555 l 2938 3561 l 2951 3568 l 2965 3575 l + 2978 3584 l 2992 3593 l 3007 3602 l 3021 3612 l 3035 3623 l + 3050 3633 l 3064 3643 l 3079 3652 l 3093 3661 l 3108 3670 l + 3122 3677 l 3136 3684 l 3150 3690 l 3166 3696 l 3182 3701 l + 3198 3706 l 3214 3710 l 3230 3713 l 3246 3716 l 3263 3719 l + 3279 3721 l 3295 3724 l 3311 3726 l 3327 3729 l 3343 3731 l + 3359 3733 l 3375 3735 l 3391 3736 l 3407 3737 l 3423 3738 l + 3439 3738 l 3455 3738 l 3471 3738 l 3488 3737 l 3504 3737 l + 3520 3736 l 3536 3736 l 3552 3735 l 3568 3735 l 3584 3735 l + 3600 3735 l 3616 3735 l 3632 3735 l 3648 3734 l 3663 3734 l + 3678 3733 l 3693 3732 l 3708 3731 l 3723 3730 l 3739 3729 l + 3755 3729 l 3771 3729 l 3788 3730 l 3806 3732 l 3825 3735 l + 3840 3738 l 3856 3741 l 3874 3745 l 3892 3749 l 3911 3753 l + 3931 3757 l 3951 3762 l 3972 3767 l 3993 3772 l 4014 3777 l + 4034 3782 l 4054 3787 l 4072 3793 l 4089 3799 l 4105 3805 l + 4119 3811 l 4130 3818 l 4140 3825 l 4150 3835 l 4157 3846 l + 4161 3858 l 4163 3870 l 4164 3883 l 4163 3897 l 4161 3911 l + 4159 3925 l 4156 3939 l 4154 3952 l 4151 3966 l 4148 3979 l + 4144 3992 l 4140 4005 l 4135 4018 l 4128 4031 l 4121 4045 l + 4112 4058 l 4104 4073 l 4095 4087 l 4085 4101 l 4075 4116 l + 4065 4129 l 4055 4143 l 4043 4155 l 4032 4166 l 4019 4176 l + 4005 4185 l 3992 4192 l 3978 4197 l 3963 4202 l 3947 4206 l + 3930 4210 l 3913 4213 l 3896 4216 l 3878 4218 l 3861 4220 l + 3843 4222 l 3825 4224 l 3807 4226 l 3789 4228 l 3771 4229 l + 3753 4230 l 3735 4230 l 3717 4230 l 3698 4228 l 3678 4226 l + 3659 4224 l 3639 4220 l 3619 4216 l 3598 4212 l 3578 4208 l + 3557 4203 l 3536 4199 l 3516 4195 l 3496 4191 l 3477 4189 l + 3457 4187 l 3438 4185 l 3420 4185 l 3402 4185 l 3384 4186 l + 3367 4188 l 3350 4190 l 3333 4193 l 3317 4196 l 3301 4200 l + 3285 4203 l 3269 4207 l 3253 4211 l 3237 4214 l 3220 4218 l + 3203 4221 l 3186 4224 l 3168 4227 l 3150 4230 l 3132 4233 l + 3113 4236 l 3094 4239 l 3074 4242 l 3055 4246 l 3035 4249 l + 3015 4253 l 2995 4257 l 2974 4260 l 2954 4264 l 2934 4267 l + 2914 4270 l 2894 4272 l 2874 4274 l 2855 4275 l 2835 4275 l + 2815 4275 l 2795 4274 l 2775 4272 l 2755 4270 l 2734 4268 l + 2713 4265 l 2692 4262 l 2671 4259 l 2650 4256 l 2630 4252 l + 2609 4249 l 2590 4245 l 2571 4242 l 2553 4238 l 2536 4234 l + 2520 4230 l 2503 4225 l 2487 4219 l 2473 4213 l 2460 4207 l + 2448 4200 l 2437 4192 l 2426 4185 l 2415 4178 l 2404 4170 l + 2393 4163 l 2380 4157 l 2368 4151 l 2354 4145 l 2340 4140 l + 2325 4135 l 2310 4131 l 2294 4128 l 2277 4125 l 2260 4122 l + 2243 4120 l 2225 4118 l 2208 4115 l 2191 4113 l 2174 4110 l + 2158 4107 l 2143 4104 l 2128 4100 l 2115 4095 l 2101 4089 l + 2087 4083 l 2074 4076 l 2061 4070 l 2049 4063 l 2037 4056 l + 2025 4049 l 2014 4042 l 2004 4034 l 1995 4025 l 1987 4016 l + 1980 4005 l 1975 3993 l 1972 3980 l 1971 3965 l 1970 3949 l + 1971 3932 l 1972 3915 l 1973 3898 l 1974 3881 l 1976 3865 l + 1977 3850 l 1978 3837 l 1980 3825 l 1983 3812 l 1986 3801 l + 1990 3792 l 1994 3784 l 1998 3776 l 2003 3768 l 2008 3761 l + 2013 3752 l 2019 3744 l 2025 3735 l 2032 3726 l 2040 3717 l + 2048 3707 l 2057 3698 l 2066 3688 l 2075 3678 l 2084 3669 l + 2094 3660 l 2104 3652 l 2115 3645 l 2127 3639 l 2138 3633 l + 2150 3628 l 2162 3624 l 2174 3620 l 2186 3617 l 2200 3613 l + 2214 3609 l 2231 3604 l cp gs col0 s gr +% Polyline +n 3645 1080 m 3660 1077 l 3677 1074 l 3694 1071 l 3713 1068 l 3733 1065 l + 3754 1063 l 3775 1060 l 3798 1058 l 3820 1056 l 3843 1053 l + 3866 1051 l 3889 1049 l 3912 1047 l 3934 1045 l 3955 1043 l + 3976 1041 l 3996 1039 l 4015 1038 l 4033 1036 l 4050 1035 l + 4071 1034 l 4090 1033 l 4109 1032 l 4127 1032 l 4144 1031 l + 4161 1031 l 4177 1031 l 4193 1031 l 4209 1031 l 4225 1031 l + 4241 1031 l 4257 1032 l 4273 1032 l 4289 1033 l 4304 1034 l + 4320 1035 l 4337 1037 l 4354 1039 l 4371 1041 l 4387 1044 l + 4403 1047 l 4419 1050 l 4435 1053 l 4450 1057 l 4466 1060 l + 4481 1063 l 4497 1067 l 4513 1071 l 4529 1075 l 4545 1080 l + 4561 1085 l 4577 1091 l 4592 1097 l 4607 1103 l 4622 1110 l + 4637 1118 l 4651 1125 l 4666 1132 l 4681 1140 l 4697 1147 l + 4713 1153 l 4731 1159 l 4750 1165 l 4770 1170 l 4787 1174 l + 4804 1177 l 4823 1180 l 4842 1182 l 4863 1184 l 4884 1186 l + 4906 1188 l 4928 1189 l 4950 1190 l 4972 1192 l 4994 1193 l + 5016 1195 l 5037 1197 l 5058 1200 l 5077 1203 l 5096 1206 l + 5113 1210 l 5130 1215 l 5148 1221 l 5165 1228 l 5181 1235 l + 5197 1242 l 5212 1250 l 5228 1259 l 5243 1267 l 5257 1276 l + 5272 1285 l 5286 1294 l 5299 1303 l 5312 1312 l 5324 1322 l + 5336 1331 l 5346 1340 l 5355 1350 l 5365 1363 l 5373 1378 l + 5380 1392 l 5386 1408 l 5390 1424 l 5394 1440 l 5398 1456 l + 5401 1472 l 5402 1488 l 5403 1502 l 5403 1517 l 5400 1530 l + 5395 1543 l 5389 1555 l 5381 1568 l 5372 1580 l 5363 1592 l + 5354 1604 l 5343 1616 l 5331 1627 l 5318 1638 l 5303 1648 l + 5286 1657 l 5265 1665 l 5251 1669 l 5235 1673 l 5219 1677 l + 5201 1680 l 5182 1683 l 5162 1685 l 5141 1688 l 5119 1690 l + 5097 1692 l 5075 1694 l 5053 1696 l 5030 1697 l 5008 1699 l + 4986 1701 l 4964 1703 l 4943 1704 l 4921 1706 l 4901 1707 l + 4880 1709 l 4860 1710 l 4840 1711 l 4819 1712 l 4799 1713 l + 4779 1713 l 4758 1713 l 4738 1714 l 4717 1714 l 4697 1714 l + 4676 1714 l 4655 1714 l 4635 1714 l 4614 1714 l 4594 1714 l + 4573 1714 l 4553 1713 l 4533 1713 l 4513 1713 l 4494 1712 l + 4474 1711 l 4455 1710 l 4434 1709 l 4413 1707 l 4392 1705 l + 4372 1703 l 4351 1701 l 4331 1698 l 4311 1695 l 4291 1692 l + 4271 1690 l 4251 1687 l 4231 1684 l 4211 1681 l 4191 1678 l + 4172 1675 l 4152 1673 l 4133 1670 l 4114 1668 l 4095 1665 l + 4074 1662 l 4053 1659 l 4033 1657 l 4012 1654 l 3992 1651 l + 3972 1648 l 3951 1645 l 3931 1643 l 3911 1640 l 3891 1637 l + 3872 1634 l 3852 1631 l 3833 1628 l 3815 1626 l 3797 1623 l + 3780 1620 l 3761 1617 l 3743 1614 l 3725 1611 l 3708 1608 l + 3692 1605 l 3675 1602 l 3659 1600 l 3643 1597 l 3627 1594 l + 3612 1591 l 3597 1587 l 3582 1584 l 3568 1580 l 3555 1575 l + 3541 1569 l 3527 1563 l 3514 1556 l 3501 1550 l 3489 1543 l + 3477 1536 l 3465 1529 l 3454 1522 l 3444 1514 l 3435 1505 l + 3427 1496 l 3420 1485 l 3415 1473 l 3412 1460 l 3411 1445 l + 3410 1430 l 3411 1414 l 3412 1397 l 3413 1380 l 3414 1364 l + 3416 1348 l 3417 1333 l 3418 1318 l 3420 1305 l 3423 1290 l + 3425 1275 l 3428 1261 l 3431 1247 l 3434 1233 l 3437 1220 l + 3442 1207 l 3447 1194 l 3455 1182 l 3465 1170 l 3474 1162 l + 3483 1155 l 3493 1148 l 3504 1141 l 3515 1134 l 3526 1127 l + 3538 1121 l 3550 1114 l 3563 1108 l 3577 1102 l 3591 1096 l + 3607 1090 l 3625 1085 l cp gs col0 s gr +% Polyline +n 2475 1215 m 2477 1217 l 2482 1221 l 2491 1229 l 2503 1239 l 2517 1252 l + 2534 1267 l 2552 1282 l 2570 1296 l 2588 1310 l 2605 1322 l + 2621 1332 l 2638 1342 l 2655 1350 l 2669 1356 l 2684 1362 l + 2700 1368 l 2717 1374 l 2734 1380 l 2752 1386 l 2770 1392 l + 2789 1398 l 2808 1403 l 2827 1409 l 2846 1415 l 2865 1420 l + 2884 1425 l 2902 1429 l 2920 1433 l 2937 1436 l 2954 1438 l + 2970 1440 l 2988 1441 l 3006 1441 l 3024 1440 l 3041 1439 l + 3059 1437 l 3076 1434 l 3094 1431 l 3111 1428 l 3129 1425 l + 3146 1421 l 3162 1417 l 3179 1414 l 3195 1409 l 3211 1405 l + 3226 1400 l 3240 1395 l 3256 1388 l 3271 1380 l 3287 1370 l + 3304 1358 l 3322 1344 l 3340 1329 l 3359 1314 l 3376 1299 l + 3391 1286 l 3404 1275 l 3412 1267 l 3418 1262 l 3420 1260 l gs col0 s gr +% Polyline +n 1125 3060 m 1126 3063 l 1127 3068 l 1129 3078 l 1132 3093 l 1136 3112 l + 1141 3135 l 1146 3162 l 1153 3190 l 1159 3219 l 1166 3248 l + 1173 3275 l 1180 3301 l 1187 3324 l 1193 3345 l 1200 3364 l + 1207 3381 l 1215 3397 l 1224 3414 l 1234 3429 l 1245 3444 l + 1256 3459 l 1267 3473 l 1279 3486 l 1291 3499 l 1304 3512 l + 1316 3525 l 1329 3537 l 1342 3550 l 1355 3562 l 1368 3574 l + 1382 3585 l 1396 3596 l 1410 3607 l 1425 3617 l 1441 3626 l + 1457 3635 l 1473 3644 l 1490 3653 l 1507 3661 l 1524 3669 l + 1542 3677 l 1559 3685 l 1577 3692 l 1595 3700 l 1613 3706 l + 1631 3713 l 1649 3718 l 1668 3723 l 1687 3727 l 1704 3730 l + 1723 3732 l 1743 3733 l 1764 3734 l 1788 3734 l 1814 3733 l + 1841 3732 l 1869 3731 l 1898 3729 l 1926 3727 l 1952 3725 l + 1975 3724 l 1993 3722 l 2008 3721 l 2017 3721 l 2022 3720 l + 2025 3720 l gs col0 s gr +/Helvetica-iso ff 180.00 scf sf +3600 1260 m +gs 1 -1 sc (attributes:) col0 sh gr +/Helvetica-iso ff 180.00 scf sf +3600 1485 m +gs 1 -1 sc ("att" -> Value "apple") col0 sh gr +/Helvetica-iso ff 180.00 scf sf +2250 3780 m +gs 1 -1 sc (attributes:) col0 sh gr +/Helvetica-Oblique-iso ff 180.00 scf sf +390 4725 m +gs 1 -1 sc (An orangeCherries) col0 sh gr +/Helvetica-iso ff 180.00 scf sf +2250 4005 m +gs 1 -1 sc ("att" -> Value "orange") col0 sh gr +/Helvetica-Bold-iso ff 180.00 scf sf +1815 3015 m +gs 1 -1 sc ("Cherries") col0 sh gr +/Helvetica-Bold-iso ff 180.00 scf sf +375 4125 m +gs 1 -1 sc ("An orange") col0 sh gr +/Helvetica-Bold-iso ff 180.00 scf sf +750 2985 m +gs 1 -1 sc () col0 sh gr +/Helvetica-Bold-iso ff 180.00 scf sf +1410 2085 m +gs 1 -1 sc () col0 sh gr +/Helvetica-Bold-iso ff 180.00 scf sf +2790 2070 m +gs 1 -1 sc () col0 sh gr +/Helvetica-Bold-iso ff 180.00 scf sf +2100 1200 m +gs 1 -1 sc () col0 sh gr +$F2psEnd +rs + +%%EndDocument + @endspecial 396 2578 a + currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch +neg exch translate + 396 2578 a 357 x Fv(Only)g(elements,)g(data)g +(sections,)g(attrib)n(utes)g(and)g(processing)e(instructions)i(\(and)f +(comments,)g(if)h(con\002gured\))e(can,)396 3043 y(directly)i(or)g +(indirectly)-5 b(,)18 b(occur)h(in)h(the)h(document)d(tree.)i(It)g(is)h +(impossible)f(to)g(add)g(entity)g(references)f(to)h(the)g(tree;)g(if) +396 3151 y(the)g(parser)g(\002nds)g(such)g(a)h(reference,)d(not)i(the)g +(reference)f(as)i(such)f(b)n(ut)g(the)g(referenced)e(te)o(xt)i(\(i.e.)g +(the)g(tree)396 3259 y(representing)e(the)j(structured)d(te)o(xt\))i +(is)h(included)e(in)h(the)g(tree.)396 3409 y(Note)g(that)h(the)f +(parser)f(collapses)i(as)g(much)e(data)h(material)g(into)g(one)f(data)h +(node)f(as)i(possible)f(such)g(that)g(there)g(are)396 +3517 y(normally)f(ne)n(v)o(er)g(tw)o(o)h(adjacent)f(data)i(nodes.)e +(This)h(in)m(v)n(ariant)f(is)i(enforced)d(e)n(v)o(en)h(if)i(data)f +(material)f(is)j(included)c(by)396 3625 y(entity)i(references)f(or)h +(CD)m(A)-9 b(T)h(A)20 b(sections,)g(or)g(if)h(a)f(data)g(sequence)f(is) +j(interrupted)c(by)h(comments.)g(So)i Fq(a)44 b(&)g(b)396 +3732 y Fo(<)p Fq(-)h(comment)e(-)p Fo(>)i Fq(c)f Fo(<)p +Fq(![CDATA[)g Fo(<>)g Fq(d]])p Fo(>)20 b Fv(is)h(represented)d(by)i +(only)g(one)f(data)h(node,)f(for)h(instance.)396 3840 +y(Ho)n(we)n(v)o(er)m(,)e(you)i(can)g(create)g(document)e(trees)i +(manually)f(which)h(break)f(this)i(in)m(v)n(ariant;)d(it)j(is)g(only)f +(the)g(w)o(ay)g(the)396 3948 y(parser)g(forms)f(the)h(tree.)p +Black 3800 5278 a Fr(50)p Black eop +%%Page: 51 51 +51 50 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-2.)f +(Nodes)h(ar)o(e)g(doubly)g(link)o(ed)i(tr)o(ees)396 1537 +y + currentpoint currentpoint translate 1 1 scale neg exch neg exch translate + 396 1537 a @beginspecial 0 @llx 0 @lly 138 @urx 93 +@ury 1380 @rwi @setspecial +%%BeginDocument: pic/node_general.ps +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: src/pic/node_general.fig +%%Creator: fig2dev Version 3.2 Patchlevel 1 +%%CreationDate: Sun Aug 27 02:05:42 2000 +%%For: gerd@ice (Gerd Stolpmann) +%%Orientation: Portrait +%%BoundingBox: 0 0 138 93 +%%Pages: 0 +%%BeginSetup +%%EndSetup +%%Magnification: 0.8000 +%%EndComments +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def + +end +save +-22.0 126.0 translate +1 -1 scale + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def + /DrawEllipse { + /endangle exch def + /startangle exch def + /yrad exch def + /xrad exch def + /y exch def + /x exch def + /savematrix mtrx currentmatrix def + x y tr xrad yrad sc 0 0 1 startangle endangle arc + closepath + savematrix setmatrix + } def + +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def +%%EndProlog + +$F2psBegin +10 setmiterlimit +n -1000 3487 m -1000 -1000 l 4162 -1000 l 4162 3487 l cp clip + 0.05039 0.05039 sc +7.500 slw +% Ellipse +n 2025 2025 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 1350 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 2700 2025 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 2025 1125 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Polyline +gs clippath +1743 1345 m 1845 1275 l 1788 1385 l 1877 1284 l 1832 1244 l cp +clip +n 1380 1800 m 1845 1275 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 1743 1345 m 1845 1275 l 1788 1385 l 1765 1365 l 1743 1345 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +1384 1745 m 1282 1815 l 1339 1705 l 1250 1807 l 1295 1846 l cp +clip +n 1815 1207 m 1282 1815 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 1384 1745 m 1282 1815 l 1339 1705 l 1361 1725 l 1384 1745 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2025 1470 m 2055 1350 l 2085 1470 l 2085 1335 l 2025 1335 l cp +clip +n 2055 1792 m 2055 1350 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 2025 1470 m 2055 1350 l 2085 1470 l 2055 1470 l 2025 1470 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2010 1687 m 1980 1807 l 1950 1687 l 1950 1822 l 2010 1822 l cp +clip +n 1980 1350 m 1980 1807 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 2010 1687 m 1980 1807 l 1950 1687 l 1980 1687 l 2010 1687 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2511 1750 m 2550 1867 l 2461 1782 l 2533 1896 l 2583 1864 l cp +clip +n 2190 1297 m 2550 1867 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 2511 1750 m 2550 1867 l 2461 1782 l 2486 1766 l 2511 1750 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2262 1353 m 2220 1237 l 2312 1320 l 2237 1208 l 2187 1241 l cp +clip +n 2602 1807 m 2220 1237 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 2262 1353 m 2220 1237 l 2312 1320 l 2287 1337 l 2262 1353 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +n 450 675 m 3150 675 l 3150 2475 l 450 2475 l cp gs col0 s gr +/Courier ff 150.00 scf sf +2377 1342 m +gs 1 -1 sc (parent) col0 sh gr +/Courier ff 150.00 scf sf +645 1628 m +gs 1 -1 sc (sub_nodes) col0 sh gr +$F2psEnd +rs + +%%EndDocument + @endspecial 396 1537 a + currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch +neg exch translate + 396 1537 a 357 x Fv(The)e(node)f(tree)h(has)h +(links)f(in)g(both)g(directions:)f(Ev)o(ery)g(node)g(has)h(a)h(link)f +(to)g(its)i(parent)d(\(if)h(an)o(y\),)f(and)g(it)i(has)g(links)f(to)396 +2002 y(the)g(subnodes)f(\(see)i(\002gure)e Fr(Nodes)h(ar)m(e)h(doubly)d +(link)o(ed)i(tr)m(ees)p Fv(\).)h(Ob)o(viously)-5 b(,)18 +b(this)i(doubly-link)o(ed)d(structure)396 2110 y(simpli\002es)k(the)f +(na)n(vigation)e(in)j(the)f(tree;)g(b)n(ut)g(has)h(also)f(some)g +(consequences)f(for)g(the)h(possible)g(operations)f(on)h(trees.)396 +2259 y(Because)h(e)n(v)o(ery)d(node)i(must)g(ha)n(v)o(e)f(at)i(most)f +Fr(one)g Fv(parent)f(node,)g(operations)g(are)h(ille)o(gal)g(if)g(the)o +(y)f(violate)h(this)396 2367 y(condition.)e(The)i(follo)n(wing)f +(\002gure)g(\()p Fr(A)h(node)g(can)f(only)h(be)g(added)f(if)i(it)g(is)g +(a)f(r)l(oot)q Fv(\))g(sho)n(ws)h(on)e(the)i(left)f(side)h(that)f(node) +396 2475 y Fq(y)h Fv(is)g(added)e(to)h Fq(x)h Fv(as)g(ne)n(w)f(subnode) +e(which)i(is)h(allo)n(wed)f(because)f Fq(y)i Fv(does)f(not)g(ha)n(v)o +(e)f(a)i(parent)e(yet.)h(The)g(right)f(side)i(of)396 +2583 y(the)f(picture)g(illustrates)g(what)h(w)o(ould)e(happen)g(if)h +Fq(y)h Fv(had)e(a)i(parent)e(node;)g(this)i(is)g(ille)o(gal)f(because)f +Fq(y)i Fv(w)o(ould)e(ha)n(v)o(e)h(tw)o(o)396 2691 y(parents)g(after)g +(the)g(operation.)396 2923 y Fu(Figur)o(e)g(3-3.)f(A)i(node)f(can)g +(only)g(be)h(added)g(if)f(it)h(is)g(a)f(r)o(oot)396 4165 +y + currentpoint currentpoint translate 1 1 scale neg exch neg exch translate + 396 4165 a @beginspecial 0 @llx 0 @lly 422 @urx 127 +@ury 4220 @rwi @setspecial +%%BeginDocument: pic/node_add.ps +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: src/pic/node_add.fig +%%Creator: fig2dev Version 3.2 Patchlevel 1 +%%CreationDate: Sun Aug 27 02:05:42 2000 +%%For: gerd@ice (Gerd Stolpmann) +%%Orientation: Portrait +%%BoundingBox: 0 0 422 127 +%%Pages: 0 +%%BeginSetup +%%EndSetup +%%Magnification: 0.8000 +%%EndComments +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def + +end +save +-33.0 171.0 translate +1 -1 scale + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def + /DrawEllipse { + /endangle exch def + /startangle exch def + /yrad exch def + /xrad exch def + /y exch def + /x exch def + /savematrix mtrx currentmatrix def + x y tr xrad yrad sc 0 0 1 startangle endangle arc + closepath + savematrix setmatrix + } def + +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def +%%EndProlog + +$F2psBegin +10 setmiterlimit +n -1000 4387 m -1000 -1000 l 10012 -1000 l 10012 4387 l cp clip + 0.05039 0.05039 sc +7.500 slw +% Ellipse +n 6141 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 6141 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 5426 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 6856 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 7571 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 8524 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 8047 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 1866 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 1866 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 1151 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 2581 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 3296 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 4249 2925 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 3772 2250 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 8325 1350 242 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Polyline +gs clippath +5507 1945 m 5402 2017 l 5460 1904 l 5369 2008 l 5415 2049 l cp +clip +n 5910 1440 m 5402 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 5507 1945 m 5402 2017 l 5460 1904 l 5484 1924 l 5507 1945 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6134 1902 m 6101 2025 l 6072 1901 l 6070 2039 l 6132 2041 l cp +clip +n 6109 1590 m 6101 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 6134 1902 m 6101 2025 l 6072 1901 l 6103 1901 l 6134 1902 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6649 1952 m 6697 2070 l 6599 1989 l 6681 2100 l 6731 2064 l cp +clip +n 6307 1537 m 6697 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 6649 1952 m 6697 2070 l 6599 1989 l 6624 1970 l 6649 1952 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +7696 2606 m 7602 2692 l 7645 2572 l 7568 2687 l 7619 2722 l cp +clip +n 7832 2347 m 7602 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 7696 2606 m 7602 2692 l 7645 2572 l 7671 2589 l 7696 2606 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +8306 2632 m 8349 2752 l 8255 2666 l 8332 2782 l 8383 2747 l cp +clip +n 8150 2452 m 8349 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 8306 2632 m 8349 2752 l 8255 2666 l 8281 2649 l 8306 2632 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +5853 1564 m 5958 1492 l 5899 1605 l 5991 1501 l 5945 1460 l cp +clip +n 5490 2017 m 5958 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 5853 1564 m 5958 1492 l 5899 1605 l 5876 1584 l 5853 1564 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +6140 1698 m 6173 1575 l 6201 1699 l 6204 1561 l 6142 1559 l cp +clip +n 6164 2010 m 6173 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 6140 1698 m 6173 1575 l 6201 1699 l 6170 1699 l 6140 1698 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +6404 1588 m 6355 1470 l 6454 1551 l 6371 1440 l 6321 1476 l cp +clip +n 6768 2025 m 6355 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 6404 1588 m 6355 1470 l 6454 1551 l 6429 1569 l 6404 1588 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +7784 2499 m 7880 2415 l 7835 2534 l 7914 2420 l 7863 2385 l cp +clip +n 7673 2715 m 7880 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 7784 2499 m 7880 2415 l 7835 2534 l 7810 2517 l 7784 2499 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +8263 2535 m 8222 2415 l 8315 2502 l 8240 2386 l 8188 2419 l cp +clip +n 8412 2707 m 8222 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 8263 2535 m 8222 2415 l 8315 2502 l 8289 2519 l 8263 2535 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +1232 1945 m 1127 2017 l 1185 1904 l 1094 2008 l 1140 2049 l cp +clip +n 1635 1440 m 1127 2017 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 1232 1945 m 1127 2017 l 1185 1904 l 1209 1924 l 1232 1945 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +1859 1902 m 1826 2025 l 1797 1901 l 1795 2039 l 1857 2041 l cp +clip +n 1834 1590 m 1826 2025 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 1859 1902 m 1826 2025 l 1797 1901 l 1828 1902 l 1859 1902 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2374 1952 m 2422 2070 l 2324 1989 l 2406 2100 l 2456 2064 l cp +clip +n 2032 1537 m 2422 2070 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 2374 1952 m 2422 2070 l 2324 1989 l 2349 1970 l 2374 1952 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +3421 2606 m 3327 2692 l 3370 2572 l 3293 2687 l 3344 2722 l cp +clip +n 3557 2347 m 3327 2692 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 3421 2606 m 3327 2692 l 3370 2572 l 3396 2589 l 3421 2606 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +4031 2632 m 4074 2752 l 3980 2666 l 4057 2782 l 4108 2747 l cp +clip +n 3875 2452 m 4074 2752 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 4031 2632 m 4074 2752 l 3980 2666 l 4006 2649 l 4031 2632 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +1578 1564 m 1683 1492 l 1624 1605 l 1716 1501 l 1670 1460 l cp +clip +n 1215 2017 m 1683 1492 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 1578 1564 m 1683 1492 l 1624 1605 l 1601 1584 l 1578 1564 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +1865 1698 m 1898 1575 l 1926 1699 l 1929 1561 l 1867 1559 l cp +clip +n 1889 2010 m 1898 1575 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 1865 1698 m 1898 1575 l 1926 1699 l 1895 1698 l 1865 1698 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2129 1588 m 2080 1470 l 2179 1551 l 2096 1440 l 2046 1476 l cp +clip +n 2493 2025 m 2080 1470 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 2129 1588 m 2080 1470 l 2179 1551 l 2154 1569 l 2129 1588 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +3509 2499 m 3605 2415 l 3560 2534 l 3639 2420 l 3588 2385 l cp +clip +n 3398 2715 m 3605 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 3509 2499 m 3605 2415 l 3560 2534 l 3535 2517 l 3509 2499 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +3988 2535 m 3947 2415 l 4040 2502 l 3965 2386 l 3913 2419 l cp +clip +n 4137 2707 m 3947 2415 l gs col7 0.75 shd ef gr gs col0 s gr gr + +% arrowhead +n 3988 2535 m 3947 2415 l 4040 2502 l 4014 2519 l 3988 2535 l cp gs col7 1.00 shd ef gr col0 s +% Polyline + [60] 0 sd +n 6387 1372 m 8023 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd +% Polyline +n 4950 900 m 9000 900 l 9000 3375 l 4950 3375 l cp gs col0 s gr +% Polyline + [60] 0 sd +n 2112 1372 m 3748 2017 l gs col7 0.75 shd ef gr gs col0 s gr [] 0 sd +% Polyline +n 675 900 m 4725 900 l 4725 3375 l 675 3375 l cp gs col0 s gr +% Polyline +gs clippath +8119 1904 m 8055 2010 l 8061 1886 l 8022 2016 l 8079 2033 l cp +clip +n 8197 1545 m 8055 2010 l gs col0 s gr gr + +% arrowhead +n 8119 1904 m 8055 2010 l 8061 1886 l 8090 1895 l 8119 1904 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +8214 1695 m 8280 1590 l 8271 1713 l 8313 1585 l 8256 1566 l cp +clip +n 8137 2025 m 8280 1590 l gs col0 s gr gr + +% arrowhead +n 8214 1695 m 8280 1590 l 8271 1713 l 8243 1704 l 8214 1695 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +30.000 slw +gs clippath +7687 2205 m 7502 2333 l 7594 2129 l 7410 2351 l 7503 2428 l cp +clip +n 7875 1500 m 7620 1965 l 7845 1920 l 7485 2355 l gs col0 s gr gr + +% arrowhead +15.000 slw +n 7687 2205 m 7502 2333 l 7594 2129 l 7618 2195 l 7687 2205 l cp gs 0.00 setgray ef gr col0 s +/Courier-Bold ff 195.00 scf sf +6094 1379 m +gs 1 -1 sc (x) col0 sh gr +/Courier-Bold ff 195.00 scf sf +7991 2265 m +gs 1 -1 sc (y) col0 sh gr +/Courier-Bold ff 195.00 scf sf +1819 1379 m +gs 1 -1 sc (x) col0 sh gr +/Courier-Bold ff 195.00 scf sf +3716 2265 m +gs 1 -1 sc (y) col0 sh gr +/Courier ff 180.00 scf sf +6459 1335 m +gs 1 -1 sc (x # add_node y) col0 sh gr +/Courier ff 180.00 scf sf +2214 1365 m +gs 1 -1 sc (x # add_node y) col0 sh gr +$F2psEnd +rs + +%%EndDocument + @endspecial 396 4165 a + currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch +neg exch translate + 396 4165 a 357 x Fv(The)g("delete")g(operation) +e(simply)i(remo)o(v)o(es)f(the)h(links)g(between)f(tw)o(o)i(nodes.)e +(In)h(the)g(picture)f(\()p Fr(A)i(deleted)e(node)396 +4629 y(becomes)h(the)g(r)l(oot)g(of)h(the)f(subtr)m(ee)p +Fv(\))g(the)g(node)f Fq(x)i Fv(is)g(deleted)e(from)h(the)g(list)h(of)f +(subnodes)f(of)h Fq(y)p Fv(.)g(After)g(that,)g Fq(x)396 +4737 y Fv(becomes)g(the)g(root)f(of)h(the)g(subtree)g(starting)g(at)g +(this)h(node.)p Black 3800 5278 a Fr(51)p Black eop +%%Page: 52 52 +52 51 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-4.)f(A)i +(deleted)f(node)g(becomes)h(the)f(r)o(oot)f(of)h(the)g(subtr)o(ee)396 +1912 y + currentpoint currentpoint translate 1 1 scale neg exch neg exch translate + 396 1912 a @beginspecial 0 @llx 0 @lly 388 @urx +138 @ury 3880 @rwi @setspecial +%%BeginDocument: pic/node_delete.ps +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: src/pic/node_delete.fig +%%Creator: fig2dev Version 3.2 Patchlevel 1 +%%CreationDate: Sun Aug 27 02:05:42 2000 +%%For: gerd@ice (Gerd Stolpmann) +%%Orientation: Portrait +%%BoundingBox: 0 0 388 138 +%%Pages: 0 +%%BeginSetup +%%EndSetup +%%Magnification: 0.8000 +%%EndComments +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def + +end +save +-78.0 205.0 translate +1 -1 scale + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def + /DrawEllipse { + /endangle exch def + /startangle exch def + /yrad exch def + /xrad exch def + /y exch def + /x exch def + /savematrix mtrx currentmatrix def + x y tr xrad yrad sc 0 0 1 startangle endangle arc + closepath + savematrix setmatrix + } def + +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def +%%EndProlog + +$F2psBegin +10 setmiterlimit +n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip + 0.05039 0.05039 sc +7.500 slw +% Ellipse +n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Polyline +gs clippath +2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp +clip +n 2535 2857 m 2235 3360 l gs col0 s gr gr + +% arrowhead +n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp +clip +n 2782 2932 m 3000 3420 l gs col0 s gr gr + +% arrowhead +n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp +clip +n 2317 3367 m 2587 2910 l gs col0 s gr gr + +% arrowhead +n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp +clip +n 3060 3375 m 2842 2887 l gs col0 s gr gr + +% arrowhead +n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s +% Ellipse +n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Polyline +gs clippath +2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp +clip +n 2737 2460 m 2737 2032 l gs col0 s gr gr + +% arrowhead +n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp +clip +n 2662 2032 m 2662 2467 l gs col0 s gr gr + +% arrowhead +n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s +% Polyline +1 slj +60.000 slw +n 4050 2610 m 4725 2610 l gs col0 s gr +% Polyline +n 4050 2745 m 4725 2745 l gs col0 s gr +% Polyline +1 slc +n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr +% Polyline +0 slj +0 slc +7.500 slw +gs clippath +2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp +clip +n 2490 1905 m 2025 2467 l gs col0 s gr gr + +% arrowhead +n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp +clip +n 2827 2002 m 3202 2542 l gs col0 s gr gr + +% arrowhead +n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp +clip +n 2115 2475 m 2535 1965 l gs col0 s gr gr + +% arrowhead +n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp +clip +n 3255 2505 m 2872 1957 l gs col0 s gr gr + +% arrowhead +n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp +clip +n 6135 1905 m 5670 2467 l gs col0 s gr gr + +% arrowhead +n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp +clip +n 6472 2002 m 6847 2542 l gs col0 s gr gr + +% arrowhead +n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp +clip +n 5760 2475 m 6180 1965 l gs col0 s gr gr + +% arrowhead +n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp +clip +n 6900 2505 m 6517 1957 l gs col0 s gr gr + +% arrowhead +n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp +clip +n 8160 1957 m 7860 2460 l gs col0 s gr gr + +% arrowhead +n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp +clip +n 8407 2032 m 8625 2520 l gs col0 s gr gr + +% arrowhead +n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp +clip +n 7942 2467 m 8212 2010 l gs col0 s gr gr + +% arrowhead +n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp +clip +n 8685 2475 m 8467 1987 l gs col0 s gr gr + +% arrowhead +n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s +/Courier ff 180.00 scf sf +3960 2250 m +gs 1 -1 sc (x # delete) col0 sh gr +% Polyline +1 slj +1 slc +45.000 slw +n 2595 2362 m 2820 2137 l gs col0 s gr +% Polyline +n 2595 2137 m 2820 2362 l gs col0 s gr +% Polyline +0 slj +0 slc +7.500 slw +n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr +/Courier-Bold ff 180.00 scf sf +2640 2752 m +gs 1 -1 sc (x) col0 sh gr +/Courier-Bold ff 180.00 scf sf +8280 1845 m +gs 1 -1 sc (x) col0 sh gr +/Courier-Bold ff 180.00 scf sf +2655 1845 m +gs 1 -1 sc (y) col0 sh gr +/Courier-Bold ff 180.00 scf sf +6300 1845 m +gs 1 -1 sc (y) col0 sh gr +$F2psEnd +rs + +%%EndDocument + @endspecial 396 1912 a + currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch +neg exch translate + 396 1912 a 357 x Fv(It)g(is)h(also)e(possible)h +(to)f(mak)o(e)h(a)g(clone)e(of)i(a)g(subtree;)f(illustrated)g(in)h +Fr(The)f(clone)g(of)h(a)f(subtr)m(ee)p Fv(.)h(In)f(this)h(case,)g(the)f +(clone)396 2377 y(is)i(a)g(cop)o(y)e(of)h(the)g(original)f(subtree)h(e) +o(xcept)f(that)h(it)h(is)h(no)d(longer)g(a)i(subnode.)d(Because)i +(cloning)f(ne)n(v)o(er)g(k)o(eeps)h(the)396 2485 y(connection)e(to)j +(the)f(parent,)f(the)h(clones)g(are)g(called)g Fr(orphaned)r +Fv(.)396 2717 y Fu(Figur)o(e)g(3-5.)f(The)i(clone)f(of)g(a)g(subtr)o +(ee)396 4050 y + currentpoint currentpoint translate 1 1 scale neg exch neg exch translate + 396 4050 a @beginspecial 0 @llx 0 @lly +388 @urx 138 @ury 3880 @rwi @setspecial +%%BeginDocument: pic/node_clone.ps +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: src/pic/node_clone.fig +%%Creator: fig2dev Version 3.2 Patchlevel 1 +%%CreationDate: Sun Aug 27 02:05:42 2000 +%%For: gerd@ice (Gerd Stolpmann) +%%Orientation: Portrait +%%BoundingBox: 0 0 388 138 +%%Pages: 0 +%%BeginSetup +%%EndSetup +%%Magnification: 0.8000 +%%EndComments +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def + +end +save +-78.0 205.0 translate +1 -1 scale + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def + /DrawEllipse { + /endangle exch def + /startangle exch def + /yrad exch def + /xrad exch def + /y exch def + /x exch def + /savematrix mtrx currentmatrix def + x y tr xrad yrad sc 0 0 1 startangle endangle arc + closepath + savematrix setmatrix + } def + +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def +%%EndProlog + +$F2psBegin +10 setmiterlimit +n -1000 5062 m -1000 -1000 l 10237 -1000 l 10237 5062 l cp clip + 0.05039 0.05039 sc +7.500 slw +% Ellipse +n 2700 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 2025 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 3375 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 6345 1800 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 5670 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 7020 2700 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 8325 1800 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 7875 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 8775 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 6345 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 5895 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 6795 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 2700 2700 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 2250 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 3150 3600 229 229 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Polyline +1 slj +60.000 slw +n 4050 2610 m 4725 2610 l gs col0 s gr +% Polyline +n 4050 2745 m 4725 2745 l gs col0 s gr +% Polyline +1 slc +n 4500 2385 m 4950 2655 l 4500 2970 l gs col0 s gr +% Polyline +0 slj +0 slc +7.500 slw +gs clippath +2125 2394 m 2025 2467 l 2078 2355 l 1992 2459 l 2039 2498 l cp +clip +n 2490 1905 m 2025 2467 l gs col0 s gr gr + +% arrowhead +n 2125 2394 m 2025 2467 l 2078 2355 l 2101 2375 l 2125 2394 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +3158 2426 m 3202 2542 l 3109 2461 l 3186 2571 l 3235 2537 l cp +clip +n 2827 2002 m 3202 2542 l gs col0 s gr gr + +% arrowhead +n 3158 2426 m 3202 2542 l 3109 2461 l 3134 2443 l 3158 2426 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2436 2039 m 2535 1965 l 2482 2077 l 2568 1972 l 2521 1934 l cp +clip +n 2115 2475 m 2535 1965 l gs col0 s gr gr + +% arrowhead +n 2436 2039 m 2535 1965 l 2482 2077 l 2459 2058 l 2436 2039 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2916 2073 m 2872 1957 l 2965 2038 l 2888 1928 l 2839 1962 l cp +clip +n 3255 2505 m 2872 1957 l gs col0 s gr gr + +% arrowhead +n 2916 2073 m 2872 1957 l 2965 2038 l 2941 2055 l 2916 2073 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +5770 2394 m 5670 2467 l 5723 2355 l 5637 2459 l 5684 2498 l cp +clip +n 6135 1905 m 5670 2467 l gs col0 s gr gr + +% arrowhead +n 5770 2394 m 5670 2467 l 5723 2355 l 5746 2375 l 5770 2394 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6803 2426 m 6847 2542 l 6754 2461 l 6831 2571 l 6880 2537 l cp +clip +n 6472 2002 m 6847 2542 l gs col0 s gr gr + +% arrowhead +n 6803 2426 m 6847 2542 l 6754 2461 l 6779 2443 l 6803 2426 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6081 2039 m 6180 1965 l 6127 2077 l 6213 1972 l 6166 1934 l cp +clip +n 5760 2475 m 6180 1965 l gs col0 s gr gr + +% arrowhead +n 6081 2039 m 6180 1965 l 6127 2077 l 6104 2058 l 6081 2039 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +6561 2073 m 6517 1957 l 6610 2038 l 6533 1928 l 6484 1962 l cp +clip +n 6900 2505 m 6517 1957 l gs col0 s gr gr + +% arrowhead +n 6561 2073 m 6517 1957 l 6610 2038 l 6586 2055 l 6561 2073 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +7947 2372 m 7860 2460 l 7896 2342 l 7827 2458 l 7878 2488 l cp +clip +n 8160 1957 m 7860 2460 l gs col0 s gr gr + +% arrowhead +n 7947 2372 m 7860 2460 l 7896 2342 l 7921 2357 l 7947 2372 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +8603 2398 m 8625 2520 l 8549 2423 l 8604 2546 l 8659 2521 l cp +clip +n 8407 2032 m 8625 2520 l gs col0 s gr gr + +% arrowhead +n 8603 2398 m 8625 2520 l 8549 2423 l 8576 2410 l 8603 2398 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +8125 2098 m 8212 2010 l 8177 2129 l 8245 2012 l 8194 1982 l cp +clip +n 7942 2467 m 8212 2010 l gs col0 s gr gr + +% arrowhead +n 8125 2098 m 8212 2010 l 8177 2129 l 8151 2113 l 8125 2098 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +8489 2109 m 8467 1987 l 8543 2084 l 8488 1961 l 8433 1986 l cp +clip +n 8685 2475 m 8467 1987 l gs col0 s gr gr + +% arrowhead +n 8489 2109 m 8467 1987 l 8543 2084 l 8516 2097 l 8489 2109 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +6352 2152 m 6382 2032 l 6412 2152 l 6412 2017 l 6352 2017 l cp +clip +n 6382 2460 m 6382 2032 l gs col0 s gr gr + +% arrowhead +n 6352 2152 m 6382 2032 l 6412 2152 l 6382 2152 l 6352 2152 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +6337 2347 m 6307 2467 l 6277 2347 l 6277 2482 l 6337 2482 l cp +clip +n 6307 2032 m 6307 2467 l gs col0 s gr gr + +% arrowhead +n 6337 2347 m 6307 2467 l 6277 2347 l 6307 2347 l 6337 2347 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +5967 3272 m 5880 3360 l 5916 3242 l 5847 3358 l 5898 3388 l cp +clip +n 6180 2857 m 5880 3360 l gs col0 s gr gr + +% arrowhead +n 5967 3272 m 5880 3360 l 5916 3242 l 5941 3257 l 5967 3272 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6623 3298 m 6645 3420 l 6569 3323 l 6624 3446 l 6679 3421 l cp +clip +n 6427 2932 m 6645 3420 l gs col0 s gr gr + +% arrowhead +n 6623 3298 m 6645 3420 l 6569 3323 l 6596 3310 l 6623 3298 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +6145 2998 m 6232 2910 l 6197 3029 l 6265 2912 l 6214 2882 l cp +clip +n 5962 3367 m 6232 2910 l gs col0 s gr gr + +% arrowhead +n 6145 2998 m 6232 2910 l 6197 3029 l 6171 3013 l 6145 2998 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +6509 3009 m 6487 2887 l 6563 2984 l 6508 2861 l 6453 2886 l cp +clip +n 6705 3375 m 6487 2887 l gs col0 s gr gr + +% arrowhead +n 6509 3009 m 6487 2887 l 6563 2984 l 6536 2997 l 6509 3009 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2707 2152 m 2737 2032 l 2767 2152 l 2767 2017 l 2707 2017 l cp +clip +n 2737 2460 m 2737 2032 l gs col0 s gr gr + +% arrowhead +n 2707 2152 m 2737 2032 l 2767 2152 l 2737 2152 l 2707 2152 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2692 2347 m 2662 2467 l 2632 2347 l 2632 2482 l 2692 2482 l cp +clip +n 2662 2032 m 2662 2467 l gs col0 s gr gr + +% arrowhead +n 2692 2347 m 2662 2467 l 2632 2347 l 2662 2347 l 2692 2347 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2322 3272 m 2235 3360 l 2271 3242 l 2202 3358 l 2253 3388 l cp +clip +n 2535 2857 m 2235 3360 l gs col0 s gr gr + +% arrowhead +n 2322 3272 m 2235 3360 l 2271 3242 l 2296 3257 l 2322 3272 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2978 3298 m 3000 3420 l 2924 3323 l 2979 3446 l 3034 3421 l cp +clip +n 2782 2932 m 3000 3420 l gs col0 s gr gr + +% arrowhead +n 2978 3298 m 3000 3420 l 2924 3323 l 2951 3310 l 2978 3298 l cp gs 0.00 setgray ef gr col0 s +% Polyline +gs clippath +2500 2998 m 2587 2910 l 2552 3029 l 2620 2912 l 2569 2882 l cp +clip +n 2317 3367 m 2587 2910 l gs col0 s gr gr + +% arrowhead +n 2500 2998 m 2587 2910 l 2552 3029 l 2526 3013 l 2500 2998 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +gs clippath +2864 3009 m 2842 2887 l 2918 2984 l 2863 2861 l 2808 2886 l cp +clip +n 3060 3375 m 2842 2887 l gs col0 s gr gr + +% arrowhead +n 2864 3009 m 2842 2887 l 2918 2984 l 2891 2997 l 2864 3009 l cp gs col7 1.00 shd ef gr col0 s +% Polyline +n 1575 1350 m 9225 1350 l 9225 4050 l 1575 4050 l cp gs col0 s gr +/Courier-Bold ff 180.00 scf sf +2655 1845 m +gs 1 -1 sc (y) col0 sh gr +/Courier-Bold ff 180.00 scf sf +6300 1845 m +gs 1 -1 sc (y) col0 sh gr +/Courier-Bold ff 180.00 scf sf +6285 2752 m +gs 1 -1 sc (x) col0 sh gr +/Courier-Bold ff 180.00 scf sf +2640 2752 m +gs 1 -1 sc (x) col0 sh gr +/Courier ff 180.00 scf sf +3690 2025 m +gs 1 -1 sc (let x' =) col0 sh gr +/Courier ff 180.00 scf sf +3690 2205 m +gs 1 -1 sc (x # orphaned_clone) col0 sh gr +/Courier-Bold ff 180.00 scf sf +8235 1845 m +gs 1 -1 sc (x') col0 sh gr +$F2psEnd +rs + +%%EndDocument + @endspecial 396 4050 a + currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch +neg exch translate + 396 4050 a -2 4627 a Fp(3.2.2.)35 +b(The)f(methods)g(of)f(the)h(c)n(lass)h(type)f Fc(node)p +Black 3800 5278 a Fr(52)p Black eop +%%Page: 53 53 +53 52 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fu(General)g(obser)o(v)o +(ers)g(.)p Black 396 866 a Ft(\225)p Black 60 w Fq(extension)p +Fv(:)g(The)f(reference)g(to)h(the)h(e)o(xtension)d(object)i(which)g +(belongs)f(to)h(this)h(node)e(\(see)h(...\).)p Black +396 974 a Ft(\225)p Black 60 w Fq(dtd)p Fv(:)h(Returns)f(a)g(reference) +f(to)h(the)g(global)g(DTD.)g(All)h(nodes)e(of)h(a)h(tree)f(must)g +(share)g(the)g(same)h(DTD.)p Black 396 1082 a Ft(\225)p +Black 60 w Fq(parent)p Fv(:)f(Get)h(the)f(f)o(ather)f(node.)g(Raises)j +Fq(Not_found)d Fv(in)i(the)f(case)g(the)h(node)e(does)h(not)f(ha)n(v)o +(e)h(a)h(parent,)e(i.e.)h(the)479 1190 y(node)f(is)j(the)e(root.)p +Black 396 1298 a Ft(\225)p Black 60 w Fq(root)p Fv(:)g(Gets)h(the)g +(reference)d(to)i(the)h(root)e(node)g(of)h(the)g(tree.)g(Ev)o(ery)f +(node)g(is)i(contained)e(in)h(a)h(tree)f(with)h(a)f(root,)f(so)479 +1406 y(this)h(method)f(al)o(w)o(ays)h(succeeds.)e(Note)i(that)g(this)g +(method)e Fr(sear)m(c)o(hes)h Fv(the)h(root,)e(which)h(costs)h(time)g +(proportional)d(to)479 1514 y(the)j(length)g(of)g(the)g(path)g(to)g +(the)g(root.)p Black 396 1622 a Ft(\225)p Black 60 w +Fq(sub_nodes)p Fv(:)g(Returns)g(references)e(to)j(the)f(children.)f +(The)g(returned)g(list)i(re\003ects)g(the)f(order)f(of)h(the)g +(children.)e(F)o(or)479 1730 y(data)i(nodes,)g(this)g(method)f(returns) +g(the)i(empty)e(list.)p Black 396 1838 a Ft(\225)p Black +60 w Fq(iter_nodes)43 b(f)p Fv(:)21 b(Iterates)f(o)o(v)o(er)f(the)h +(children,)f(and)g(calls)i Fq(f)g Fv(for)e(e)n(v)o(ery)g(child)h(in)g +(turn.)p Black 396 1945 a Ft(\225)p Black 60 w Fq(iter_nodes_sibl)43 +b(f)p Fv(:)20 b(Iterates)g(o)o(v)o(er)f(the)h(children,)f(and)h(calls)g +Fq(f)h Fv(for)f(e)n(v)o(ery)e(child)i(in)h(turn.)e Fq(f)h +Fv(gets)h(as)479 2053 y(ar)o(guments)d(the)j(pre)n(vious)d(node,)h(the) +h(current)f(node,)g(and)h(the)g(ne)o(xt)f(node.)p Black +396 2161 a Ft(\225)p Black 60 w Fq(node_type)p Fv(:)h(Returns)g(either) +f Fq(T_data)h Fv(which)g(means)g(that)g(the)g(node)f(is)i(a)g(data)f +(node,)f(or)h Fq(T_element)43 b(n)479 2269 y Fv(which)20 +b(means)g(that)g(the)g(node)f(is)j(an)e(element)f(of)h(type)g +Fq(n)p Fv(.)g(If)g(con\002gured,)e(possible)i(node)f(types)h(are)g +(also)479 2377 y Fq(T_pinstr)44 b(t)20 b Fv(indicating)f(that)h(the)h +(node)e(represents)g(a)i(processing)e(instruction)g(with)h(tar)o(get)f +Fq(t)p Fv(,)i(and)479 2485 y Fq(T_comment)f Fv(in)g(which)g(case)g(the) +g(node)g(is)h(a)f(comment.)p Black 396 2593 a Ft(\225)p +Black 60 w Fq(encoding)p Fv(:)g(Returns)g(the)g(encoding)e(of)i(the)g +(strings.)p Black 396 2701 a Ft(\225)p Black 60 w Fq(data)p +Fv(:)g(Returns)g(the)h(character)e(data)h(of)g(this)g(node)f(and)h(all) +h(children,)d(concatenated)h(as)i(one)e(string.)h(The)479 +2809 y(encoding)e(of)i(the)h(string)e(is)j(what)e(the)g(method)f +Fq(encoding)g Fv(returns.)g(-)i(F)o(or)e(data)h(nodes,)g(this)g(method) +f(simply)479 2917 y(returns)h(the)g(represented)e(characters.)h(F)o(or) +h(elements,)g(the)g(meaning)f(of)g(the)i(method)d(has)j(been)e(e)o +(xtended)g(such)479 3025 y(that)i(it)f(returns)g(something)e(useful,)i +(i.e.)g(the)g(ef)n(fecti)n(v)o(ely)f(contained)f(characters,)h(without) +h(markup.)e(\(F)o(or)479 3133 y Fq(T_pinstr)i Fv(and)f +Fq(T_comment)h Fv(nodes,)f(the)h(method)f(returns)g(the)h(empty)g +(string.\))p Black 396 3241 a Ft(\225)p Black 60 w Fq(position)p +Fv(:)g(If)g(con\002gured,)d(this)k(method)e(returns)g(the)h(position)g +(of)g(the)g(element)g(as)g(triple)g(\(entity)-5 b(,)19 +b(line,)479 3349 y(byteposition\).)f(F)o(or)i(data)g(nodes,)f(the)h +(position)g(is)h(not)f(stored.)f(If)h(the)g(position)g(is)h(not)f(a)n +(v)n(ailable)f(the)i(triple)f Fq("?",)479 3456 y(0,)45 +b(0)20 b Fv(is)h(returned.)p Black 396 3564 a Ft(\225)p +Black 60 w Fq(comment)p Fv(:)f(Returns)g Fq(Some)44 b(text)20 +b Fv(for)f(comment)g(nodes,)g(and)g Fq(None)h Fv(for)g(other)f(nodes.)g +(The)h Fq(text)f Fv(is)i(e)n(v)o(erything)479 3672 y(between)f(the)g +(comment)f(delimiters)g Fo(<)p Fq(-)i Fv(and)e Fq(-)p +Fo(>)p Fv(.)p Black 396 3780 a Ft(\225)p Black 60 w Fq(pinstr)44 +b(n)p Fv(:)21 b(Returns)f(all)h(processing)d(instructions)i(that)g(are) +g(directly)f(contained)g(in)h(this)h(element)e(and)h(that)g(ha)n(v)o(e) +479 3888 y(a)h Fr(tar)m(g)o(et)h Fv(speci\002cation)d(of)h +Fq(n)p Fv(.)g(The)g(tar)o(get)f(is)j(the)e(\002rst)h(w)o(ord)e(after)h +(the)g Fo(<)p Fq(?)p Fv(.)p Black 396 3996 a Ft(\225)p +Black 60 w Fq(pinstr_names)p Fv(:)f(Returns)h(the)g(list)i(of)e(all)g +(tar)o(gets)g(of)g(processing)f(instructions)g(directly)g(contained)g +(in)h(this)479 4104 y(element.)p Black 396 4212 a Ft(\225)p +Black 60 w Fq(write)44 b(s)h(enc)p Fv(:)20 b(Prints)h(the)f(node)f(and) +h(all)h(subnodes)d(to)j(the)f(passed)g(output)f(stream)h(as)h(v)n(alid) +f(XML)g(te)o(xt,)g(using)479 4320 y(the)g(passed)h(e)o(xternal)e +(encoding.)396 4511 y Fu(Attrib)n(ute)h(obser)o(v)o(ers)h(.)p +Black 396 4743 a Ft(\225)p Black 60 w Fq(attribute)44 +b(n)p Fv(:)20 b(Returns)g(the)h(v)n(alue)e(of)h(the)g(attrib)n(ute)g +(with)g(name)g Fq(n)p Fv(.)g(This)h(method)d(returns)i(a)g(v)n(alue)g +(for)f(e)n(v)o(ery)479 4851 y(declared)g(attrib)n(ute,)h(and)f(it)i +(raises)g Fq(Not_found)e Fv(for)h(an)o(y)f(undeclared)f(attrib)n(ute.)i +(Note)g(that)g(it)h(e)n(v)o(en)e(returns)h(a)p Black +3800 5278 a Fr(53)p Black eop +%%Page: 54 54 +54 53 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 479 579 a Fv(v)n(alue)g(if)g(the)g +(attrib)n(ute)g(is)h(actually)f(missing)g(b)n(ut)g(is)h(declared)e(as)i +Fq(#IMPLIED)f Fv(or)g(has)g(a)h(def)o(ault)e(v)n(alue.)g(-)i(Possible) +479 687 y(v)n(alues)f(are:)p Black 479 919 a Fa(\225)p +Black 62 w Fq(Implied_value)p Fv(:)f(The)h(attrib)n(ute)g(has)g(been)g +(declared)e(with)j(the)f(k)o(e)o(yw)o(ord)e Fq(#IMPLIED)p +Fv(,)i(and)f(the)h(attrib)n(ute)g(is)562 1027 y(missing)g(in)h(the)f +(attrib)n(ute)g(list)h(of)f(this)h(element.)p Black 479 +1135 a Fa(\225)p Black 62 w Fq(Value)44 b(s)p Fv(:)21 +b(The)f(attrib)n(ute)g(has)g(been)g(declared)e(as)j(type)f +Fq(CDATA)p Fv(,)g(as)h Fq(ID)p Fv(,)f(as)h Fq(IDREF)p +Fv(,)e(as)i Fq(ENTITY)p Fv(,)f(or)g(as)562 1243 y Fq(NMTOKEN)p +Fv(,)g(or)g(as)g(enumeration)e(or)i(notation,)f(and)g(one)h(of)g(the)g +(tw)o(o)h(conditions)d(holds:)i(\(1\))g(The)g(attrib)n(ute)562 +1351 y(v)n(alue)g(is)h(present)e(in)i(the)f(attrib)n(ute)g(list)h(in)f +(which)g(case)h(the)f(v)n(alue)f(is)j(returned)c(in)i(the)h(string)e +Fq(s)p Fv(.)i(\(2\))e(The)562 1459 y(attrib)n(ute)h(has)h(been)e +(omitted,)g(and)h(the)g(DTD)g(declared)f(the)i(attrib)n(ute)e(with)i(a) +f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)562 1567 +y(v)n(alue)f(is)i(returned)d(in)i Fq(s)p Fv(.)g(-)g(Summarized,)d +Fq(Value)44 b(s)20 b Fv(is)h(returned)d(for)h(non-implied,)e(non-list)i +(attrib)n(ute)g(v)n(alues.)p Black 479 1675 a Fa(\225)p +Black 62 w Fq(Valuelist)44 b(l)p Fv(:)20 b(The)g(attrib)n(ute)g(has)g +(been)g(declared)f(as)i(type)e Fq(IDREFS)p Fv(,)h(as)h +Fq(ENTITIES)p Fv(,)e(or)h(as)h Fq(NMTOKENS)p Fv(,)562 +1783 y(and)f(one)g(of)f(the)i(tw)o(o)f(conditions)f(holds:)h(\(1\))f +(The)h(attrib)n(ute)g(v)n(alue)f(is)i(present)f(in)g(the)h(attrib)n +(ute)e(list)j(in)e(which)562 1891 y(case)h(the)f(space-separated)e(tok) +o(ens)i(of)g(the)g(v)n(alue)g(are)g(returned)e(in)j(the)f(string)g +(list)h Fq(l)p Fv(.)f(\(2\))g(The)g(attrib)n(ute)g(has)562 +1999 y(been)g(omitted,)f(and)h(the)g(DTD)g(declared)f(the)h(attrib)n +(ute)g(with)h(a)f(def)o(ault)g(v)n(alue.)f(The)h(def)o(ault)f(v)n(alue) +h(is)h(returned)562 2107 y(in)g Fq(l)p Fv(.)f(-)g(Summarized,)f +Fq(Valuelist)43 b(l)20 b Fv(is)i(returned)c(for)i(all)g(list-type)g +(attrib)n(ute)g(v)n(alues.)396 2256 y(Note)g(that)h(before)d(the)j +(attrib)n(ute)f(v)n(alue)f(is)i(returned,)d(the)i(v)n(alue)g(is)h +(normalized.)d(This)j(means)e(that)i(ne)n(wlines)e(are)479 +2364 y(con)m(v)o(erted)f(to)i(spaces,)g(and)g(that)g(references)f(to)h +(character)f(entities)i(\(i.e.)f Fq(&#)p Fn(n)p Fq(;)p +Fv(\))g(and)f(general)g(entities)i(\(i.e.)479 2472 y +Fq(&)p Fn(name)p Fq(;)p Fv(\))f(are)g(e)o(xpanded;)e(if)i(necessary)-5 +b(,)19 b(e)o(xpansion)f(is)j(performed)d(recursi)n(v)o(ely)-5 +b(.)479 2621 y(In)20 b(well-formedness)e(mode,)h(there)h(is)h(no)f(DTD) +g(which)g(could)f(declare)h(an)g(attrib)n(ute.)f(Because)i(of)f(this,)g +(e)n(v)o(ery)479 2729 y(occuring)f(attrib)n(ute)g(is)i(considered)e(as) +i(a)f(CD)m(A)-9 b(T)h(A)21 b(attrib)n(ute.)p Black 396 +2879 a Ft(\225)p Black 60 w Fq(required_string_attribute)41 +b(n)p Fv(:)21 b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called) +g(n,)g(or)g(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479 +2987 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h +(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h +(if)h(the)479 3094 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g +(method)f(will)i(f)o(ail.)g(-)f(This)g(method)f(is)i(con)m(v)o(enient)d +(if)i(you)g(e)o(xpect)f(a)h(non-implied)479 3202 y(and)g(non-list)f +(attrib)n(ute)h(v)n(alue.)p Black 396 3310 a Ft(\225)p +Black 60 w Fq(optional_string_attribute)41 b(n)p Fv(:)21 +b(returns)e(the)h(V)-9 b(alue)20 b(attrib)n(ute)g(called)g(n,)g(or)g +(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)g(as)h(a)479 +3418 y(string)f(where)g(the)g(list)h(elements)f(are)g(separated)f(by)h +(spaces.)g(If)h(the)f(attrib)n(ute)g(v)n(alue)f(is)i(implied,)e(or)h +(if)h(the)479 3526 y(attrib)n(ute)f(does)g(not)g(e)o(xists,)g(the)g +(method)f(returns)h(None.)f(-)h(This)h(method)e(is)i(con)m(v)o(enient)c +(if)k(you)e(e)o(xpect)g(a)i(non-list)479 3634 y(attrib)n(ute)f(v)n +(alue)g(including)e(the)i(implied)g(v)n(alue.)p Black +396 3742 a Ft(\225)p Black 60 w Fq(required_list_attribute)41 +b(n)p Fv(:)20 b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f +(called)g(n,)g(or)g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g +(list)479 3850 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n +(ute)g(v)n(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g +(not)g(e)o(xists,)g(the)g(method)479 3958 y(will)h(f)o(ail.)g(-)f(This) +g(method)f(is)i(con)m(v)o(enient)d(if)i(you)g(e)o(xpect)f(a)h(list)i +(attrib)n(ute)d(v)n(alue.)p Black 396 4066 a Ft(\225)p +Black 60 w Fq(optional_list_attribute)41 b(n)p Fv(:)20 +b(returns)f(the)g(V)-9 b(aluelist)20 b(attrib)n(ute)f(called)g(n,)g(or) +g(the)h(V)-9 b(alue)19 b(attrib)n(ute)g(as)h(a)g(list)479 +4174 y(with)h(a)f(single)g(element.)g(If)g(the)g(attrib)n(ute)g(v)n +(alue)f(is)i(implied,)f(or)g(if)g(the)g(attrib)n(ute)g(does)g(not)g(e)o +(xists,)g(an)g(empty)g(list)479 4282 y(will)h(be)f(returned.)e(-)j +(This)f(method)f(is)i(con)m(v)o(enient)d(if)i(you)f(e)o(xpect)h(a)g +(list)i(attrib)n(ute)d(v)n(alue)h(or)g(the)g(implied)f(v)n(alue.)p +Black 396 4390 a Ft(\225)p Black 60 w Fq(attribute_names)p +Fv(:)g(returns)g(the)h(list)h(of)f(all)h(attrib)n(ute)f(names)g(of)g +(this)g(element.)g(As)h(this)f(is)i(a)e(v)n(alidating)479 +4498 y(parser)m(,)f(this)i(list)g(is)g(equal)f(to)g(the)h(list)g(of)f +(declared)f(attrib)n(utes.)p Black 396 4605 a Ft(\225)p +Black 60 w Fq(attribute_type)43 b(n)p Fv(:)20 b(returns)g(the)g(type)g +(of)g(the)g(attrib)n(ute)g(called)g Fq(n)p Fv(.)g(See)h(the)f(module)f +Fq(Pxp_types)g Fv(for)g(a)479 4713 y(description)g(of)h(the)g(encoding) +e(of)i(the)g(types.)p Black 396 4821 a Ft(\225)p Black +60 w Fq(attributes)p Fv(:)f(returns)h(the)g(list)h(of)f(pairs)g(of)g +(names)g(and)g(v)n(alues)g(for)f(all)i(attrib)n(utes)f(of)g(this)h +(element.)p Black 3800 5278 a Fr(54)p Black eop +%%Page: 55 55 +55 54 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p +Black 60 w Fq(id_attribute_name)p Fv(:)e(returns)h(the)i(name)e(of)h +(the)g(attrib)n(ute)g(that)g(is)h(declared)e(with)h(type)g(ID.)g(There) +f(is)i(at)g(most)479 687 y(one)f(such)g(attrib)n(ute.)f(The)h(method)f +(raises)i Fq(Not_found)e Fv(if)i(there)e(is)i(no)f(declared)f(ID)i +(attrib)n(ute)e(for)h(the)g(element)479 795 y(type.)p +Black 396 903 a Ft(\225)p Black 60 w Fq(id_attribute_value)p +Fv(:)e(returns)h(the)i(v)n(alue)e(of)h(the)g(attrib)n(ute)g(that)g(is)h +(declared)e(with)i(type)e(ID.)i(There)e(is)i(at)479 1011 +y(most)g(one)e(such)h(attrib)n(ute.)g(The)g(method)e(raises)j +Fq(Not_found)e Fv(if)i(there)f(is)h(no)e(declared)g(ID)i(attrib)n(ute)f +(for)f(the)479 1119 y(element)h(type.)p Black 396 1226 +a Ft(\225)p Black 60 w Fq(idref_attribute_names)p Fv(:)d(returns)h(the) +h(list)i(of)e(attrib)n(ute)f(names)h(that)h(are)f(declared)f(as)i +(IDREF)f(or)g(IDREFS.)396 1417 y Fu(Modifying)h(methods)h(.)f +Fv(The)g(follo)n(wing)f(methods)g(are)h(only)f(de\002ned)g(for)h +(element)f(nodes)h(\(more)f(e)o(xactly:)g(the)396 1525 +y(methods)g(are)i(de\002ned)e(for)g(data)h(nodes,)f(too,)h(b)n(ut)g(f)o +(ail)h(al)o(w)o(ays\).)p Black 396 1758 a Ft(\225)p Black +60 w Fq(add_node)44 b(sn)p Fv(:)20 b(Adds)g(sub)g(node)g +Fq(sn)g Fv(to)g(the)g(list)i(of)e(children.)e(This)j(operation)d(is)j +(illustrated)f(in)g(the)g(picture)g Fr(A)479 1866 y(node)f(can)h(only)g +(be)g(added)f(if)h(it)h(is)h(a)e(r)l(oot)q Fv(.)g(This)h(method)e(e)o +(xpects)g(that)h Fq(sn)h Fv(is)g(a)g(root,)e(and)g(it)i(requires)f +(that)g Fq(sn)g Fv(and)479 1974 y(the)g(current)f(object)h(share)g(the) +g(same)h(DTD.)479 2123 y(Because)g Fq(add_node)e Fv(is)i(the)f(method)f +(the)h(parser)g(itself)h(uses)g(to)f(add)g(ne)n(w)g(nodes)f(to)h(the)h +(tree,)e(it)i(performs)e(by)479 2231 y(def)o(ault)h(some)g(simple)g(v)n +(alidation)f(checks:)g(If)h(the)h(content)e(model)g(is)i(a)g(re)o +(gular)e(e)o(xpression,)f(it)j(is)g(not)f(allo)n(wed)f(to)479 +2339 y(add)h(data)g(nodes)f(to)i(this)g(node)e(unless)h(the)g(ne)n(w)g +(nodes)g(consist)g(only)f(of)h(whitespace.)g(In)g(this)g(case,)h(the)f +(ne)n(w)g(data)479 2447 y(nodes)g(are)g(silently)g(dropped)e(\(you)h +(can)h(change)f(this)h(by)g(in)m(v)n(oking)e Fq +(keep_always_whitespace_mode)p Fv(\).)479 2596 y(If)i(the)h(document)d +(is)j(\003agged)e(as)i(stand-alone,)d(these)j(data)f(nodes)f(only)g +(containing)g(whitespace)g(are)h(e)n(v)o(en)479 2704 +y(forbidden)e(if)i(the)h(element)e(declaration)g(is)i(contained)d(in)j +(an)f(e)o(xternal)f(entity)-5 b(.)19 b(This)h(case)h(is)g(detected)f +(and)479 2812 y(rejected.)479 2962 y(If)g(the)h(content)e(model)g(is)i +Fq(EMPTY)p Fv(,)f(it)h(is)g(not)f(allo)n(wed)f(to)i(add)e(an)o(y)h +(data)g(node)f(unless)h(the)g(data)g(node)g(is)h(empty)-5 +b(.)18 b(In)479 3070 y(this)j(case,)f(the)h(ne)n(w)f(data)g(node)f(is)i +(silently)f(dropped.)479 3219 y(These)g(checks)g(only)f(apply)h(if)g +(there)g(is)h(a)f(DTD.)h(In)f(well-formedness)e(mode,)h(it)i(is)g +(assumed)e(that)i(e)n(v)o(ery)d(element)479 3327 y(is)j(declared)e +(with)i(content)e(model)g Fq(ANY)h Fv(which)g(prohibits)f(an)o(y)g(v)n +(alidation)g(check.)g(Furthermore,)f(you)h(turn)h(these)479 +3435 y(checks)g(of)n(f)f(by)h(passing)g Fq(~force:true)f +Fv(as)i(\002rst)g(ar)o(gument.)p Black 396 3584 a Ft(\225)p +Black 60 w Fq(add_pinstr)43 b(pi)p Fv(:)21 b(Adds)f(the)g(processing)f +(instruction)g Fq(pi)h Fv(to)h(the)f(list)h(of)f(processing)f +(instructions.)p Black 396 3692 a Ft(\225)p Black 60 +w Fq(delete)p Fv(:)h(Deletes)h(this)g(node)e(from)g(the)h(tree.)g +(After)g(this)h(operation,)d(this)i(node)g(is)h(no)f(longer)e(the)j +(child)e(of)h(the)479 3800 y(former)f(f)o(ather)g(node;)f(and)i(the)g +(node)e(loses)j(the)e(connection)f(to)i(the)g(f)o(ather)f(as)h(well.)h +(This)e(operation)f(is)j(illustrated)479 3908 y(by)f(the)g(\002gure)g +Fr(A)g(deleted)g(node)f(becomes)g(the)i(r)l(oot)f(of)g(the)h(subtr)m +(ee)p Fv(.)p Black 396 4016 a Ft(\225)p Black 60 w Fq(set_nodes)44 +b(nl)p Fv(:)20 b(Sets)h(the)f(list)i(of)e(children)e(to)j +Fq(nl)p Fv(.)f(It)g(is)i(required)c(that)i(e)n(v)o(ery)f(member)g(of)h +Fq(nl)g Fv(is)h(a)g(root,)e(and)479 4124 y(that)i(all)f(members)f(and)h +(the)g(current)f(object)h(share)g(the)g(same)g(DTD.)g(Unlik)o(e)g +Fq(add_node)p Fv(,)g(no)f(v)n(alidation)g(checks)479 +4232 y(are)h(performed.)p Black 396 4340 a Ft(\225)p +Black 60 w Fq(quick_set_attributes)42 b(atts)p Fv(:)20 +b(sets)h(the)f(attrib)n(utes)h(of)e(this)i(element)f(to)g +Fq(atts)p Fv(.)g(It)g(is)i Fr(not)f Fv(check)o(ed)479 +4448 y(whether)e Fq(atts)i Fv(matches)e(the)i(DTD)f(or)g(not;)g(it)h +(is)g(up)f(to)g(the)g(caller)g(of)g(this)h(method)e(to)h(ensure)g +(this.)g(\(This)479 4556 y(method)f(may)h(be)g(useful)g(to)g(transform) +e(the)j(attrib)n(ute)f(v)n(alues,)f(i.e.)h(apply)f(a)i(mapping)d(to)j +(e)n(v)o(ery)e(attrib)n(ute.\))p Black 396 4664 a Ft(\225)p +Black 60 w Fq(set_comment)43 b(text)p Fv(:)20 b(This)h(method)e(is)i +(only)e(applicable)g(to)h Fq(T_comment)g Fv(nodes;)f(it)i(sets)g(the)g +(comment)d(te)o(xt)479 4772 y(contained)h(by)h(such)g(nodes.)p +Black 3800 5278 a Fr(55)p Black eop +%%Page: 56 56 +56 55 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fu(Cloning)g(methods)h(.) +p Black 396 811 a Ft(\225)p Black 60 w Fq(orphaned_clone)p +Fv(:)e(Returns)h(a)g(clone)g(of)g(the)g(node)f(and)h(the)g(complete)f +(tree)h(belo)n(w)g(this)h(node)e(\(deep)g(clone\).)479 +919 y(The)h(clone)g(does)g(not)g(ha)n(v)o(e)f(a)i(parent)e(\(i.e.)h +(the)g(reference)f(to)h(the)g(parent)f(node)g(is)j Fr(not)f +Fv(cloned\).)d(While)j(cop)o(ying)479 1027 y(the)f(subtree,)g(strings)g +(are)g(skipped;)f(it)i(is)g(lik)o(ely)f(that)h(the)f(original)f(tree)h +(and)g(the)g(cop)o(y)f(tree)h(share)g(strings.)479 1135 +y(Extension)f(objects)h(are)g(cloned)f(by)h(in)m(v)n(oking)e(the)i +Fq(clone)g Fv(method)f(on)h(the)g(original)f(objects;)h(ho)n(w)g(much)f +(of)h(the)479 1243 y(e)o(xtension)f(objects)h(is)h(cloned)e(depends)g +(on)h(the)g(implemention)e(of)i(this)h(method.)479 1393 +y(This)g(operation)d(is)j(illustrated)f(by)g(the)g(\002gure)f +Fr(The)i(clone)e(of)i(a)f(subtr)m(ee)p Fv(.)p Black 396 +1542 a Ft(\225)p Black 60 w Fq(orphaned_flat_clone)p +Fv(:)e(Returns)i(a)h(clone)e(of)h(the)g(node,)f(b)n(ut)h(sets)i(the)e +(list)h(of)f(sub)g(nodes)g(to)g([],)g(i.e.)g(the)g(sub)479 +1650 y(nodes)g(are)g(not)g(cloned.)p Black 396 1758 a +Ft(\225)p Black 81 w Fq(create_element)42 b(dtd)i(nt)h(al)p +Fv(:)20 b(Returns)f(a)i(\003at)f(cop)o(y)f(of)g(this)i(node)d(\(which)h +(must)h(be)f(an)h(element\))f(with)h(the)479 1866 y(follo)n(wing)f +(modi\002cations:)g(The)h(DTD)g(is)h(set)g(to)f Fq(dtd)p +Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f Fq(nt)p Fv(,)g(and)g(the)g +(ne)n(w)g(attrib)n(ute)g(list)h(is)479 1974 y(set)g(to)f +Fq(al)g Fv(\(gi)n(v)o(en)e(as)i(list)h(of)f(\(name,v)n(alue\))d +(pairs\).)i(The)g(cop)o(y)g(does)h(not)f(ha)n(v)o(e)g(children)g(nor)g +(a)h(parent.)f(It)h(does)f(not)479 2082 y(contain)g(processing)g +(instructions.)g(See)i(the)f(e)o(xample)f(belo)n(w.)479 +2231 y(Note)h(that)h(you)e(can)h(specify)g(the)g(position)f(of)h(the)g +(ne)n(w)g(node)f(by)h(the)g(optional)f(ar)o(gument)f +Fq(~position)p Fv(.)p Black 396 2380 a Ft(\225)p Black +81 w Fq(create_data)43 b(dtd)h(cdata)p Fv(:)20 b(Returns)g(a)h(\003at)g +(cop)o(y)e(of)h(this)h(node)e(\(which)g(must)h(be)h(a)f(data)g(node\))f +(with)h(the)479 2488 y(follo)n(wing)f(modi\002cations:)g(The)h(DTD)g +(is)h(set)g(to)f Fq(dtd)p Fv(;)h(the)f(node)f(type)h(is)h(set)g(to)f +Fq(T_data)p Fv(;)g(the)g(attrib)n(ute)g(list)h(is)479 +2596 y(empty)f(\(data)f(nodes)h(ne)n(v)o(er)f(ha)n(v)o(e)g(attrib)n +(utes\);)h(the)g(list)h(of)f(children)f(and)h(PIs)h(is)g(empty)-5 +b(,)19 b(too)g(\(same)h(reason\).)f(The)479 2704 y(ne)n(w)h(node)f +(does)h(not)g(ha)n(v)o(e)g(a)g(parent.)f(The)h(v)n(alue)g +Fq(cdata)g Fv(is)h(the)f(ne)n(w)g(character)f(content)g(of)h(the)g +(node.)f(See)i(the)479 2812 y(e)o(xample)e(belo)n(w.)p +Black 396 2920 a Ft(\225)p Black 60 w Fq(keep_always_whitespace_mode)p +Fv(:)e(Ev)o(en)i(data)h(nodes)f(which)h(are)g(normally)f(dropped)e +(because)j(the)o(y)f(only)479 3028 y(contain)g(ignorable)f(whitespace,) +h(can)h(added)e(to)i(this)h(node)d(once)h(this)i(mode)e(is)h(turned)f +(on.)g(\(This)h(mode)f(is)h(useful)479 3136 y(to)h(produce)d(canonical) +h(XML.\))396 3327 y Fu(V)-8 b(alidating)20 b(methods)h(.)f +Fv(There)f(is)j(one)d(method)g(which)h(locally)f(v)n(alidates)h(the)g +(node,)f(i.e.)i(checks)e(whether)g(the)396 3435 y(subnodes)g(match)h +(the)g(content)f(model)g(of)h(this)h(node.)p Black 396 +3667 a Ft(\225)p Black 60 w Fq(local_validate)p Fv(:)e(Checks)h(that)g +(this)h(node)e(conforms)f(to)j(the)f(DTD)g(by)g(comparing)e(the)i(type) +g(of)g(the)479 3775 y(subnodes)e(with)i(the)g(content)e(model)h(for)g +(this)h(node.)e(\(Applications)g(need)h(not)g(call)h(this)h(method)d +(unless)h(the)o(y)g(add)479 3883 y(ne)n(w)h(nodes)g(themselv)o(es)f(to) +i(the)f(tree.\))-2 4294 y Fp(3.2.3.)35 b(The)f(c)n(lass)h +Fc(element_impl)396 4462 y Fv(This)21 b(class)g(is)g(an)f +(implementation)e(of)i Fq(node)g Fv(which)g(realizes)g(element)g +(nodes:)396 4642 y Fq(class)44 b([)h('ext)f(])h(element_impl)e(:)h +('ext)g(->)h([)g('ext)f(])g(node)396 4875 y Fu(Constructor)-8 +b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by)p +Black 3798 5278 a Fr(56)p Black eop +%%Page: 57 57 +57 56 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fq(new)45 +b(element_impl)d Fn(extension_object)396 770 y Fv(which)20 +b(creates)g(a)h(special)f(form)f(of)h(empty)f(element)h(which)g +(already)f(contains)g(a)i(reference)d(to)j(the)396 878 +y Fl(extension_object)p Fv(,)d(b)n(ut)i(is)h(otherwise)f(empty)-5 +b(.)18 b(This)j(special)f(form)f(is)i(called)f(an)g Fr(e)n(xemplar)r +Fv(.)g(The)g(purpose)f(of)396 986 y(e)o(x)o(emplars)g(is)i(that)f(the)o +(y)g(serv)o(e)f(as)i(patterns)f(that)g(can)g(be)g(duplicated)f(and)g +(\002lled)i(with)f(data.)g(The)g(method)396 1094 y Fq(create_element)f +Fv(is)i(designed)e(to)h(perform)e(this)j(action.)396 +1243 y Fu(Example.)f Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396 +1423 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 1520 +y(let)h(exemplar)222 b(=)45 b(new)f(element_impl)f(exemplar_ext)g(in) +396 1711 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e +(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(element)g +(nodes)f(are)i(created:)396 1891 y Fq(let)45 b(element)e(=)i(exemplar)e +(#)i(create_element)e(dtd)h(\(T_element)f(name\))h(attlist)396 +2082 y Fv(The)20 b Fq(element)g Fv(is)h(a)f(cop)o(y)g(of)g +Fq(exemplar)f Fv(\(e)n(v)o(en)g(the)h(e)o(xtension)f +Fq(exemplar_ext)g Fv(has)h(been)g(copied\))e(which)396 +2190 y(ensures)h(that)h Fq(element)f Fv(and)g(its)i(e)o(xtension)d(are) +i(objects)f(of)h(the)f(same)h(class)h(as)f(the)g(e)o(x)o(emplars;)e +(note)h(that)h(you)e(need)396 2298 y(not)i(to)g(pass)h(a)g(class)g +(name)f(or)f(other)h(meta)g(information.)d(The)j(cop)o(y)g(is)h +(initially)f(connected)e(with)j(the)f Fq(dtd)p Fv(,)g(it)h(gets)f(a)396 +2406 y(node)f(type,)h(and)g(the)g(attrib)n(ute)g(list)h(is)g(\002lled.) +f(The)g Fq(element)g Fv(is)h(no)n(w)e(fully)h(functional;)e(it)j(can)f +(be)g(added)f(to)i(another)396 2514 y(element)f(as)h(child,)e(and)h(it) +h(can)f(contain)f(references)g(to)h(subnodes.)-2 2884 +y Fp(3.2.4.)35 b(The)f(c)n(lass)h Fc(data_impl)396 3051 +y Fv(This)21 b(class)g(is)g(an)f(implementation)e(of)i +Fq(node)g Fv(which)g(should)f(be)h(used)g(for)f(all)i(character)e(data) +h(nodes:)396 3232 y Fq(class)44 b([)h('ext)f(])h(data_impl)e(:)i('ext)f +(->)g([)h('ext)f(])h(node)396 3464 y Fu(Constructor)-8 +b(.)19 b Fv(Y)-9 b(ou)20 b(can)g(create)f(a)i(ne)n(w)f(instance)g(by) +396 3644 y Fq(new)45 b(data_impl)e Fn(extension_object)396 +3835 y Fv(which)20 b(creates)g(an)g(empty)g(e)o(x)o(emplar)e(node)h +(which)h(is)h(connected)d(to)i Fl(extension_object)p +Fv(.)e(The)i(node)f(does)396 3943 y(not)h(contain)f(a)i(reference)d(to) +j(an)o(y)e(DTD,)h(and)g(because)f(of)h(this)h(it)g(cannot)e(be)h(added) +f(to)i(node)e(trees.)396 4093 y(T)-7 b(o)21 b(get)f(a)g(fully)g(w)o +(orking)f(data)h(node,)f(apply)g(the)h(method)f Fq(create_data)g +Fv(to)h(the)g(e)o(x)o(emplar)f(\(see)h(e)o(xample\).)396 +4242 y Fu(Example.)g Fv(First,)h(create)f(an)g(e)o(x)o(emplar)e(by)396 +4422 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(in)396 4519 +y(let)h(exemplar)222 b(=)45 b(new)f(exemplar_ext)f(data_impl)h(in)396 +4710 y Fv(The)20 b Fq(exemplar)g Fv(is)h(not)f(used)f(in)i(node)e +(trees,)h(b)n(ut)g(only)g(as)h(a)f(pattern)g(when)f(the)h(data)g(nodes) +g(are)g(created:)p Black 3797 5278 a Fr(57)p Black eop +%%Page: 58 58 +58 57 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fq(let)45 +b(data_node)e(=)i(exemplar)e(#)i(create_data)e(dtd)h("The)g(characters) +f(con-)396 676 y(tained)h(in)h(the)f(data)g(node")396 +867 y Fv(The)20 b Fq(data_node)f Fv(is)i(a)g(cop)o(y)e(of)h +Fq(exemplar)p Fv(.)g(The)f(cop)o(y)h(is)h(initially)f(connected)e(with) +j(the)f Fq(dtd)p Fv(,)g(and)f(it)i(is)h(\002lled)396 +975 y(with)f(character)e(material.)g(The)h Fq(data_node)f +Fv(is)i(no)n(w)f(fully)g(functional;)e(it)j(can)f(be)g(added)f(to)h(an) +h(element)e(as)i(child.)-2 1345 y Fp(3.2.5.)35 b(The)f(type)g +Fc(spec)396 1512 y Fv(The)20 b(type)g Fq(spec)g Fv(de\002nes)g(a)g(w)o +(ay)h(to)f(handle)f(the)h(details)h(of)f(creating)f(nodes)g(from)h(e)o +(x)o(emplars.)396 1692 y Fq(type)44 b('ext)h(spec)396 +1790 y(constraint)e('ext)i(=)f('ext)g(node)h(#extension)396 +1984 y(val)g(make_spec_from_mapping)c(:)665 2081 y +(?super_root_exemplar)h(:)i('ext)h(node)f(->)665 2178 +y(?comment_exemplar)e(:)j('ext)f(node)g(->)665 2275 y +(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665 +2372 y(?pinstr_mapping)f(:)h(\(string,)g('ext)g(node\))g(Hashtbl.t)f +(->)665 2469 y(data_exemplar:)g('ext)h(node)g(->)665 +2567 y(default_element_exemplar:)d('ext)j(node)g(->)665 +2664 y(element_mapping:)e(\(string,)i('ext)g(node\))g(Hashtbl.t)f(->) +665 2761 y(unit)h(->)755 2858 y('ext)g(spec)396 3052 +y(val)h(make_spec_from_alist)c(:)665 3149 y(?super_root_exemplar)h(:)i +('ext)h(node)f(->)665 3247 y(?comment_exemplar)e(:)j('ext)f(node)g(->) +665 3344 y(?default_pinstr_exemplar)d(:)k('ext)f(node)g(->)665 +3441 y(?pinstr_alist)f(:)i(\(string)e(*)i('ext)f(node\))g(list)g(->)665 +3538 y(data_exemplar:)f('ext)h(node)g(->)665 3635 y +(default_element_exemplar:)d('ext)j(node)g(->)665 3732 +y(element_alist:)f(\(string)g(*)i('ext)f(node\))g(list)g(->)665 +3829 y(unit)g(->)755 3927 y('ext)g(spec)396 4117 y Fv(The)20 +b(tw)o(o)h(functions)d Fq(make_spec_from_mapping)f Fv(and)j +Fq(make_spec_from_alist)d Fv(create)j Fq(spec)g Fv(v)n(alues.)396 +4225 y(Both)g(functions)f(are)h(functionally)e(equi)n(v)n(alent)h(and)g +(the)i(only)e(dif)n(ference)f(is)j(that)g(the)f(\002rst)h(function)d +(prefers)396 4333 y(hashtables)i(and)g(the)g(latter)g(associati)n(v)o +(e)g(lists)h(to)g(describe)e(mappings)g(from)g(names)h(to)g(e)o(x)o +(emplars.)396 4483 y(Y)-9 b(ou)20 b(can)g(specify)f(e)o(x)o(emplars)g +(for)g(the)i(v)n(arious)e(kinds)g(of)h(nodes)g(that)g(need)g(to)g(be)g +(generated)e(when)i(an)g(XML)396 4591 y(document)e(is)k(parsed:)p +Black 3800 5278 a Fr(58)p Black eop +%%Page: 59 59 +59 58 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black Black 396 579 a Ft(\225)p +Black 60 w Fq(~super_root_exemplar)p Fv(:)e(This)i(e)o(x)o(emplar)e(is) +j(used)f(to)h(create)f(the)g(super)f(root.)h(This)g(special)g(node)g +(is)h(only)479 687 y(created)f(if)g(the)g(corresponding)d +(con\002guration)h(option)h(has)h(been)g(selected;)g(it)h(is)g(the)f +(parent)f(node)g(of)h(the)h(root)479 795 y(node)e(which)h(may)g(be)g +(con)m(v)o(enient)d(if)k(e)n(v)o(ery)e(w)o(orking)f(node)i(must)g(ha)n +(v)o(e)f(a)i(parent.)p Black 396 903 a Ft(\225)p Black +60 w Fq(~comment_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar)d(is)j(used)f +(when)f(a)i(comment)e(node)g(must)h(be)g(created.)g(Note)g(that)g(such) +479 1011 y(nodes)g(are)g(only)f(created)h(if)g(the)g(corresponding)d +(con\002guration)h(option)h(is)i("on".)p Black 396 1119 +a Ft(\225)p Black 60 w Fq(~default_pinstr_exemplar)p +Fv(:)c(If)j(a)h(node)e(for)g(a)i(processing)e(instruction)g(must)h(be)g +(created,)f(and)h(the)479 1226 y(instruction)f(is)i(not)f(listed)h(in)f +(the)g(table)h(passed)f(by)f Fq(~pinstr_mapping)g Fv(or)h +Fq(~pinstr_alist)p Fv(,)e(this)j(e)o(x)o(emplar)479 1334 +y(is)g(used.)f(Again)f(the)i(con\002guration)c(option)i(must)h(be)g +("on")g(in)g(order)f(to)i(create)e(such)h(nodes)g(at)h(all.)p +Black 396 1442 a Ft(\225)p Black 60 w Fq(~pinstr_mapping)e +Fv(or)g Fq(~pinstr_alist)p Fv(:)g(Map)h(the)g(tar)o(get)g(names)f(of)h +(processing)f(instructions)g(to)479 1550 y(e)o(x)o(emplars.)g(These)h +(mappings)e(are)i(only)g(used)g(when)f(nodes)h(for)f(processing)g +(instructions)g(are)h(created.)p Black 396 1658 a Ft(\225)p +Black 60 w Fq(~data_exemplar)p Fv(:)f(The)h(e)o(x)o(emplar)e(for)h +(ordinary)f(data)i(nodes.)p Black 396 1766 a Ft(\225)p +Black 60 w Fq(~default_element_exemplar)p Fv(:)d(This)j(e)o(x)o(emplar) +e(is)k(used)e(if)g(an)g(element)g(node)f(must)h(be)g(created,)f(b)n(ut) +i(the)479 1874 y(element)f(type)g(cannot)f(be)h(found)e(in)j(the)f +(tables)g Fq(element_mapping)e Fv(or)i Fq(element_alist)p +Fv(.)p Black 396 1982 a Ft(\225)p Black 60 w Fq(~element_mapping)e +Fv(or)i Fq(~element_alist)p Fv(:)f(Map)h(the)g(element)f(types)h(to)h +(e)o(x)o(emplars.)d(These)i(mappings)f(are)479 2090 y(used)h(to)h +(create)e(element)h(nodes.)396 2239 y(In)g(most)g(cases,)h(you)e(only)h +(w)o(ant)g(to)g(create)g Fq(spec)g Fv(v)n(alues)g(to)h(pass)f(them)g +(to)g(the)h(parser)e(functions)g(found)f(in)396 2347 +y Fq(Pxp_yacc)p Fv(.)h(Ho)n(we)n(v)o(er)m(,)f(it)j(might)f(be)g(useful) +g(to)g(apply)f Fq(spec)h Fv(v)n(alues)g(directly)-5 b(.)396 +2497 y(The)20 b(follo)n(wing)f(functions)f(create)i(v)n(arious)f(types) +h(of)g(nodes)g(by)g(selecting)f(the)i(corresponding)16 +b(e)o(x)o(emplar)j(from)g(the)396 2605 y(passed)h Fq(spec)g +Fv(v)n(alue,)g(and)f(by)h(calling)g Fq(create_element)e +Fv(or)i Fq(create_data)f Fv(on)h(the)g(e)o(x)o(emplar)-5 +b(.)396 2785 y Fq(val)45 b(create_data_node)d(:)665 2882 +y('ext)i(spec)h(->)665 2979 y(dtd)g(->)665 3076 y(\(*)g(data)f +(material:)f(*\))i(string)f(->)845 3173 y('ext)g(node)396 +3368 y(val)h(create_element_node)c(:)665 3465 y(?position:\(string)h(*) +j(int)f(*)h(int\))f(->)665 3562 y('ext)g(spec)h(->)665 +3659 y(dtd)g(->)665 3756 y(\(*)g(element)e(type:)h(*\))h(string)f(->) +665 3853 y(\(*)h(attributes:)e(*\))h(\(string)g(*)h(string\))e(list)h +(->)845 3950 y('ext)g(node)396 4145 y(val)h(create_super_root_node)c(:) +665 4242 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665 +4339 y('ext)g(spec)h(->)710 4436 y(dtd)f(->)889 4533 +y('ext)h(node)396 4728 y(val)g(create_comment_node)c(:)665 +4825 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)p +Black 3800 5278 a Fr(59)p Black eop +%%Page: 60 60 +60 59 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 665 579 a Fq('ext)44 +b(spec)h(->)665 676 y(dtd)g(->)665 773 y(\(*)g(comment)e(text:)h(*\))h +(string)f(->)845 870 y('ext)g(node)396 1065 y(val)h(create_pinstr_node) +c(:)665 1162 y(?position:\(string)h(*)j(int)f(*)h(int\))f(->)665 +1259 y('ext)g(spec)h(->)665 1356 y(dtd)g(->)665 1453 +y(proc_instruction)d(->)845 1550 y('ext)i(node)-2 2003 +y Fp(3.2.6.)35 b(Examples)396 2171 y Fu(Building)22 b(tr)o(ees.)d +Fv(Here)h(is)h(the)g(piece)e(of)h(code)g(that)g(creates)g(the)h(tree)f +(of)g(the)g(\002gure)f Fr(A)i(tr)m(ee)g(with)f(element)g(nodes,)396 +2279 y(data)g(nodes,)f(and)g(attrib)n(utes)p Fv(.)h(The)g(e)o(xtension) +f(object)h(and)f(the)h(DTD)h(are)f(be)o(yond)e(the)i(scope)g(of)g(this) +g(e)o(xample.)396 2459 y Fq(let)45 b(exemplar_ext)d(=)j(...)f(\(*)h +(some)f(extension)f(*\))i(in)396 2556 y(let)g(dtd)f(=)h(...)f(\(*)g +(some)h(DTD)f(*\))g(in)396 2750 y(let)h(element_exemplar)d(=)i(new)h +(element_impl)e(exemplar_ext)f(in)396 2847 y(let)j(data_exemplar)177 +b(=)44 b(new)h(data_impl)178 b(exemplar_ext)42 b(in)396 +3042 y(let)j(a1)f(=)h(element_exemplar)d(#)j(cre-)396 +3139 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("apple"])396 +3236 y(and)h(b1)f(=)h(element_exemplar)d(#)j(create_element)d(dtd)i +(\(T_element)g("b"\))g([])396 3333 y(and)h(c1)f(=)h(element_exemplar)d +(#)j(create_element)d(dtd)i(\(T_element)g("c"\))g([])396 +3430 y(and)h(a2)f(=)h(element_exemplar)d(#)j(cre-)396 +3527 y(ate_element)e(dtd)i(\(T_element)e("a"\))h(["att",)g("orange"]) +396 3624 y(in)396 3819 y(let)h(cherries)e(=)i(data_exemplar)d(#)j +(create_data)e(dtd)h("Cherries")g(in)396 3916 y(let)h(orange)133 +b(=)45 b(data_exemplar)d(#)j(create_data)e(dtd)h("An)h(orange")e(in)396 +4110 y(a1)i(#)f(add_node)g(b1;)396 4207 y(a1)h(#)f(add_node)g(c1;)396 +4304 y(b1)h(#)f(add_node)g(a2;)396 4401 y(b1)h(#)f(add_node)g +(cherries;)396 4499 y(a2)h(#)f(add_node)g(orange;)396 +4689 y Fv(Alternati)n(v)o(ely)-5 b(,)18 b(the)i(last)h(block)f(of)g +(statements)g(could)f(also)i(be)f(written)g(as:)396 4870 +y Fq(a1)45 b(#)f(set_nodes)g([b1;)g(c1];)p Black 3800 +5278 a Fr(60)p Black eop +%%Page: 61 61 +61 60 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fq(b1)45 +b(#)f(set_nodes)g([a2;)g(cherries];)396 676 y(a2)h(#)f(set_nodes)g +([orange];)396 867 y Fv(The)20 b(root)g(of)g(the)g(tree)g(is)h +Fq(a1)p Fv(,)f(i.e.)g(it)h(is)g(true)f(that)396 1047 +y Fq(x)45 b(#)g(root)f(==)g(a1)396 1238 y Fv(for)20 b(e)n(v)o(ery)f(x)h +(from)f({)i Fq(a1)p Fv(,)f Fq(a2)p Fv(,)g Fq(b1)p Fv(,)g +Fq(c1)p Fv(,)g Fq(cherries)p Fv(,)g Fq(orange)f Fv(}.)396 +1388 y(Furthermore,)f(the)i(follo)n(wing)f(properties)f(hold:)486 +1568 y Fq(a1)44 b(#)h(attribute)e("att")h(=)h(Value)f("apple")396 +1665 y(&)h(a2)f(#)h(attribute)e("att")h(=)h(Value)f("orange")396 +1859 y(&)h(cherries)e(#)i(data)f(=)h("Cherries")396 1956 +y(&)135 b(orange)43 b(#)i(data)f(=)h("An)f(orange")396 +2053 y(&)314 b(a1)44 b(#)h(data)f(=)h("CherriesAn)e(orange")396 +2248 y(&)314 b(a1)44 b(#)h(node_type)e(=)i(T_element)e("a")396 +2345 y(&)314 b(a2)44 b(#)h(node_type)e(=)i(T_element)e("a")396 +2442 y(&)314 b(b1)44 b(#)h(node_type)e(=)i(T_element)e("b")396 +2539 y(&)314 b(c1)44 b(#)h(node_type)e(=)i(T_element)e("c")396 +2636 y(&)i(cherries)e(#)i(node_type)e(=)i(T_data)396 +2733 y(&)135 b(orange)43 b(#)i(node_type)e(=)i(T_data)396 +2928 y(&)314 b(a1)44 b(#)h(sub_nodes)e(=)i([)g(b1;)f(c1)h(])396 +3025 y(&)314 b(a2)44 b(#)h(sub_nodes)e(=)i([)g(orange)f(])396 +3122 y(&)314 b(b1)44 b(#)h(sub_nodes)e(=)i([)g(a2;)f(cherries)g(])396 +3219 y(&)314 b(c1)44 b(#)h(sub_nodes)e(=)i([])396 3316 +y(&)g(cherries)e(#)i(sub_nodes)e(=)i([])396 3413 y(&)135 +b(orange)43 b(#)i(sub_nodes)e(=)i([])396 3608 y(&)314 +b(a2)44 b(#)h(parent)f(==)g(a1)396 3705 y(&)314 b(b1)44 +b(#)h(parent)f(==)g(b1)396 3802 y(&)314 b(c1)44 b(#)h(parent)f(==)g(a1) +396 3899 y(&)h(cherries)e(#)i(parent)f(==)g(b1)396 3996 +y(&)135 b(orange)43 b(#)i(parent)f(==)g(a2)396 4229 y +Fu(Sear)o(ching)19 b(nodes.)g Fv(The)g(follo)n(wing)e(function)h +(searches)h(all)g(nodes)g(of)g(a)g(tree)h(for)e(which)h(a)g(certain)g +(condition)e(holds:)396 4409 y Fq(let)45 b(rec)f(search)g(p)g(t)h(=)486 +4506 y(if)f(p)h(t)g(then)576 4603 y(t)f(::)h(search_list)e(p)h(\(t)h(#) +g(sub_nodes\))486 4700 y(else)576 4797 y(search_list)e(p)h(\(t)h(#)f +(sub_nodes\))p Black 3800 5278 a Fr(61)p Black eop +%%Page: 62 62 +62 61 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 676 a Fq(and)45 +b(search_list)e(p)h(l)h(=)486 773 y(match)f(l)h(with)576 +870 y([])268 b(-)p Fo(>)45 b Fq([])486 967 y(|)g(t)f(::)h(l')f(-)p +Fo(>)h Fq(\(search)e(p)i(t\))f(@)h(\(search_list)e(p)i(l'\))396 +1065 y(;;)396 1297 y Fv(F)o(or)20 b(e)o(xample,)f(if)h(you)f(w)o(ant)i +(to)f(search)g(all)h(elements)f(of)f(a)i(certain)f(type)f +Fq(et)p Fv(,)i(the)f(function)e Fq(search)i Fv(can)g(be)g(applied)396 +1405 y(as)h(follo)n(ws:)396 1585 y Fq(let)45 b(search_element_type)c +(et)k(t)f(=)486 1682 y(search)g(\(fun)g(x)h(-)p Fo(>)f +Fq(x)h(#)f(node_type)g(=)g(T_element)g(et\))g(t)396 1779 +y(;;)396 2012 y Fu(Getting)20 b(attrib)n(ute)f(v)o(alues.)h +Fv(Suppose)f(we)i(ha)n(v)o(e)f(the)g(declaration:)396 +2192 y Fq()396 +2577 y Fv(In)20 b(this)h(case,)f(e)n(v)o(ery)f(element)h +Fq(e)g Fv(must)h(ha)n(v)o(e)e(an)h(attrib)n(ute)g Fq(a)p +Fv(,)g(otherwise)g(the)g(parser)g(w)o(ould)f(indicate)h(an)g(error)-5 +b(.)19 b(If)h(the)396 2685 y(O'Caml)h(v)n(ariable)e Fq(n)h +Fv(holds)g(the)g(node)f(of)h(the)g(tree)h(corresponding)16 +b(to)21 b(the)f(element,)f(you)g(can)h(get)h(the)f(v)n(alue)f(of)h(the) +396 2793 y(attrib)n(ute)g Fq(a)h Fv(by)396 2973 y Fq(let)45 +b(value_of_a)e(=)h(n)h(#)g(required_string_attribute)40 +b("a")396 3164 y Fv(which)20 b(is)h(more)e(or)h(less)i(an)e(abbre)n +(viation)d(for)396 3344 y Fq(let)45 b(value_of_a)e(=)486 +3442 y(match)h(n)h(#)f(attribute)g("a")g(with)576 3539 +y(Value)g(s)g(->)h(s)486 3636 y(|)g(_)313 b(->)45 b(assert)f(false)396 +3827 y Fv(-)21 b(as)g(the)f(attrib)n(ute)g(is)h(required,)d(the)i +Fq(attribute)f Fv(method)g(al)o(w)o(ays)i(returns)e(a)i +Fq(Value)p Fv(.)396 3976 y(In)f(contrast)g(to)g(this,)h(the)f(attrib)n +(ute)g Fq(b)g Fv(can)g(be)g(omitted.)g(In)f(this)i(case,)g(the)f +(method)396 4084 y Fq(required_string_attribute)d Fv(w)o(orks)j(only)f +(if)h(the)h(attrib)n(ute)f(is)h(there,)e(and)h(the)g(method)f(will)i(f) +o(ail)f(if)h(the)396 4192 y(attrib)n(ute)f(is)h(missing.)f(T)-7 +b(o)20 b(get)h(the)f(v)n(alue,)f(you)g(can)h(apply)g(the)g(method)f +Fq(optional_string_attribute)p Fv(:)396 4372 y Fq(let)45 +b(value_of_b)e(=)h(n)h(#)g(optional_string_attribute)40 +b("b")396 4563 y Fv(No)n(w)-5 b(,)20 b Fq(value_of_b)f +Fv(is)i(of)f(type)g Fq(string)43 b(option)p Fv(,)20 b(and)f +Fq(None)i Fv(represents)e(the)h(omitted)g(attrib)n(ute.)f(Alternati)n +(v)o(ely)-5 b(,)396 4671 y(you)20 b(could)f(also)h(use)h +Fq(attribute)p Fv(:)396 4851 y Fq(let)45 b(value_of_b)e(=)p +Black 3800 5278 a Fr(62)p Black eop +%%Page: 63 63 +63 62 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 486 579 a Fq(match)44 +b(n)h(#)f(attribute)g("b")g(with)576 676 y(Value)g(s)313 +b(->)45 b(Some)f(s)486 773 y(|)h(Implied_value)d(->)j(None)486 +870 y(|)g(_)582 b(->)45 b(assert)f(false)396 1103 y Fv(The)20 +b(attrib)n(ute)g Fq(c)h Fv(beha)n(v)o(es)e(much)g(lik)o(e)h +Fq(a)p Fv(,)h(because)e(it)i(has)g(al)o(w)o(ays)f(a)h(v)n(alue.)e(If)h +(the)g(attrib)n(ute)g(is)h(omitted,)f(the)g(def)o(ault,)396 +1211 y(here)g("12345",)e(will)j(be)f(returned)e(instead.)i(Because)g +(of)g(this,)h(you)e(can)h(again)f(use)396 1319 y Fq +(required_string_attribute)e Fv(to)j(get)g(the)h(v)n(alue.)396 +1468 y(The)f(type)g Fq(CDATA)g Fv(is)h(the)f(most)g(general)f(string)h +(type.)g(The)g(types)g Fq(NMTOKEN)p Fv(,)f Fq(ID)p Fv(,)h +Fq(IDREF)p Fv(,)g Fq(ENTITY)p Fv(,)f(and)h(all)396 1576 +y(enumerators)e(and)i(notations)f(are)h(special)h(forms)e(of)h(string)g +(types)g(that)g(restrict)g(the)h(possible)f(v)n(alues.)f(From)396 +1684 y(O'Caml,)h(the)o(y)g(beha)n(v)o(e)f(lik)o(e)h Fq(CDATA)p +Fv(,)g(i.e.)g(you)f(can)h(use)h(the)f(methods)f Fq +(required_string_attribute)e Fv(and)396 1792 y Fq +(optional_string_attribute)p Fv(,)g(too.)396 1941 y(In)j(contrast)g(to) +g(this,)h(the)f(types)g Fq(NMTOKENS)p Fv(,)f Fq(IDREFS)p +Fv(,)g(and)h Fq(ENTITIES)g Fv(mean)f(lists)j(of)e(strings.)g(Suppose)f +(we)h(ha)n(v)o(e)396 2049 y(the)g(declaration:)396 2229 +y Fq()396 2517 y Fv(The)20 +b(type)g Fq(NMTOKENS)f Fv(stands)i(for)e(lists)j(of)e(space-separated)e +(tok)o(ens;)i(for)f(e)o(xample)g(the)h(v)n(alue)g Fq("1)44 +b(abc)h(23ef")396 2625 y Fv(means)20 b(the)g(list)i Fq(["1";)44 +b("abc";)f("23ef"])p Fv(.)20 b(\(Again,)e Fq(IDREFS)i +Fv(and)g Fq(ENTITIES)f Fv(ha)n(v)o(e)h(more)f(restricted)h(v)n +(alues.\))396 2733 y(T)-7 b(o)21 b(get)f(the)g(v)n(alue)g(of)f(attrib)n +(ute)h Fq(d)p Fv(,)h(one)e(can)h(use)396 2913 y Fq(let)45 +b(value_of_d)e(=)h(n)h(#)g(required_list_attribute)c("d")396 +3104 y Fv(or)396 3285 y Fq(let)k(value_of_d)e(=)486 3382 +y(match)h(n)h(#)f(attribute)g("d")g(with)576 3479 y(Valuelist)f(l)i(->) +f(l)486 3576 y(|)h(_)493 b(->)44 b(assert)g(false)396 +3767 y Fv(As)21 b Fq(d)g Fv(is)g(required,)d(the)i(attrib)n(ute)g +(cannot)f(be)h(omitted,)g(and)f(the)h Fq(attribute)g +Fv(method)e(returns)i(al)o(w)o(ays)g(a)396 3875 y Fq(Valuelist)p +Fv(.)396 4024 y(F)o(or)g(optional)f(attrib)n(utes)h(lik)o(e)h +Fq(e)p Fv(,)f(apply)396 4204 y Fq(let)45 b(value_of_e)e(=)h(n)h(#)g +(optional_list_attribute)c("e")396 4395 y Fv(or)396 4576 +y Fq(let)k(value_of_e)e(=)486 4673 y(match)h(n)h(#)f(attribute)g("e")g +(with)576 4770 y(Valuelist)f(l)134 b(->)45 b(l)486 4867 +y(|)g(Implied_value)d(->)j([])p Black 3800 5278 a Fr(63)p +Black eop +%%Page: 64 64 +64 63 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 486 579 a Fq(|)45 +b(_)582 b(->)45 b(assert)f(false)396 770 y Fv(Here,)20 +b(the)g(case)h(that)f(the)g(attrib)n(ute)g(is)h(missing)f(counts)g(lik) +o(e)g(the)h(empty)e(list.)-2 1139 y Fp(3.2.7.)35 b(Iterator)n(s)396 +1307 y Fv(There)20 b(are)g(also)g(se)n(v)o(eral)g(iterators)g(in)g +(Pxp_document;)d(please)j(see)h(the)f(mli)h(\002le)f(for)g(details.)g +(Y)-9 b(ou)20 b(can)g(\002nd)396 1415 y(e)o(xamples)f(for)h(them)g(in)g +(the)g("simple_transformation")d(directory)-5 b(.)396 +1595 y Fq(val)45 b(find)f(:)g(?deeply:bool)f(->)889 1692 +y(f:\('ext)h(node)g(->)h(bool\))f(->)g('ext)g(node)h(->)f('ext)g(node) +396 1887 y(val)h(find_all)e(:)i(?deeply:bool)e(->)1069 +1984 y(f:\('ext)g(node)i(->)f(bool\))g(->)h('ext)f(node)g(->)g('ext)h +(node)f(list)396 2178 y(val)h(find_element)d(:)j(?deeply:bool)e(->)1248 +2275 y(string)h(->)g('ext)h(node)f(->)g('ext)g(node)396 +2469 y(val)h(find_all_elements)d(:)i(?deeply:bool)f(->)1472 +2567 y(string)h(->)h('ext)f(node)g(->)g('ext)h(node)f(list)396 +2761 y(exception)g(Skip)396 2858 y(val)h(map_tree)e(:)90 +b(pre:\('exta)43 b(node)h(->)g('extb)g(node\))g(->)1069 +2955 y(?post:\('extb)f(node)h(->)g('extb)g(node\))g(->)1069 +3052 y('exta)g(node)g(->)1248 3149 y('extb)g(node)396 +3441 y(val)h(map_tree_sibl)d(:)755 3538 y(pre:)i(\('exta)g(node)g +(option)g(->)g('exta)g(node)h(->)f('exta)g(node)g(option)g(->)1203 +3635 y('extb)g(node\))g(->)710 3732 y(?post:\('extb)f(node)h(option)g +(->)g('extb)g(node)h(->)f('extb)g(node)g(option)g(->)1203 +3829 y('extb)g(node\))g(->)710 3927 y('exta)g(node)g(->)889 +4024 y('extb)g(node)396 4218 y(val)h(iter_tree)e(:)i(?pre:\('ext)e +(node)h(->)g(unit\))g(->)1114 4315 y(?post:\('ext)f(node)h(->)g(unit\)) +g(->)1114 4412 y('ext)g(node)g(->)1293 4509 y(unit)396 +4704 y(val)h(iter_tree_sibl)d(:)710 4801 y(?pre:)i(\('ext)g(node)g +(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g(unit\))g +(->)p Black 3800 5278 a Fr(64)p Black eop +%%Page: 65 65 +65 64 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 710 579 a Fq(?post:\('ext)43 +b(node)h(option)g(->)h('ext)f(node)g(->)g('ext)h(node)f(option)g(->)g +(unit\))g(->)710 676 y('ext)g(node)g(->)889 773 y(unit)-2 +1358 y Fx(3.3.)39 b(The)g(c)m(lass)g(type)g Fb(extension)396 +1610 y Fq(class)44 b(type)g([)h('node)f(])h(extension)e(=)486 +1707 y(object)h(\('self\))576 1804 y(method)f(clone)h(:)h('self)665 +1901 y(\(*)g("clone")e(should)h(return)g(an)h(exact)f(deep)g(copy)g(of) +g(the)h(object.)e(*\))576 1998 y(method)g(node)i(:)f('node)665 +2095 y(\(*)h("node")f(returns)f(the)i(corresponding)d(node)i(of)h(this) +f(extension.)f(This)h(method)710 2193 y(*)h(intended)e(to)i(return)f +(exactly)f(what)h(previ-)396 2290 y(ously)g(has)h(been)f(set)g(by)h +("set_node".)710 2387 y(*\))576 2484 y(method)e(set_node)h(:)h('node)f +(->)g(unit)665 2581 y(\(*)h("set_node")e(is)h(invoked)g(once)g(the)h +(extension)e(is)h(associated)g(to)g(a)h(new)710 2678 +y(*)g(node)f(object.)710 2775 y(*\))486 2873 y(end)396 +3063 y Fv(This)21 b(is)g(the)f(type)g(of)g(classes)h(used)f(for)f(node) +h(e)o(xtensions.)e(F)o(or)i(e)n(v)o(ery)f(node)g(of)h(the)g(document)e +(tree,)i(there)g(is)h(not)396 3171 y(only)f(the)g Fq(node)g +Fv(object,)f(b)n(ut)h(also)g(an)g Fq(extension)f Fv(object.)h(The)f +(latter)i(has)f(minimal)f(functionality;)f(it)j(has)f(only)g(the)396 +3279 y(necessary)g(methods)f(to)h(be)g(attached)g(to)g(the)g(node)f +(object)h(containing)e(the)j(details)f(of)g(the)g(node)f(instance.)h +(The)396 3387 y(e)o(xtension)f(object)h(is)h(called)f(e)o(xtension)f +(because)g(its)i(purpose)e(is)i(e)o(xtensibility)-5 b(.)396 +3537 y(F)o(or)20 b(some)g(reasons,)g(it)h(is)g(impossible)e(to)i(deri)n +(v)o(e)d(the)j Fq(node)f Fv(classes)h(\(i.e.)f Fq(element_impl)f +Fv(and)g Fq(data_impl)p Fv(\))g(such)396 3645 y(that)i(the)f +(subclasses)g(can)g(be)g(e)o(xtended)f(by)g(ne)n(w)h(ne)n(w)g(methods.) +f(But)i(subclassing)f(nodes)f(is)i(a)g(great)f(feature,)396 +3753 y(because)g(it)h(allo)n(ws)f(the)g(user)g(to)h(pro)o(vide)d(dif)n +(ferent)g(classes)k(for)d(dif)n(ferent)g(types)h(of)g(nodes.)f(The)h(e) +o(xtension)f(objects)396 3860 y(are)h(a)h(w)o(orkaround)c(that)j(is)i +(as)e(po)n(werful)f(as)i(direct)f(subclassing,)f(the)h(costs)h(are)f +(some)g(notation)f(o)o(v)o(erhead.)p Black 3800 5278 +a Fr(65)p Black eop +%%Page: 66 66 +66 65 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fu(Figur)o(e)g(3-6.)f +(The)i(structur)o(e)f(of)g(nodes)g(and)h(extensions)396 +1928 y + currentpoint currentpoint translate 1 1 scale neg exch neg exch translate + 396 1928 a @beginspecial 0 @llx 0 @lly 206 @urx +140 @ury 2060 @rwi @setspecial +%%BeginDocument: pic/extension_general.ps +%!PS-Adobe-2.0 EPSF-2.0 +%%Title: src/pic/extension_general.fig +%%Creator: fig2dev Version 3.2 Patchlevel 1 +%%CreationDate: Sun Aug 27 02:05:42 2000 +%%For: gerd@ice (Gerd Stolpmann) +%%Orientation: Portrait +%%BoundingBox: 0 0 206 140 +%%Pages: 0 +%%BeginSetup +%%EndSetup +%%Magnification: 0.8000 +%%EndComments +/$F2psDict 200 dict def +$F2psDict begin +$F2psDict /mtrx matrix put +/col-1 {0 setgray} bind def +/col0 {0.000 0.000 0.000 srgb} bind def +/col1 {0.000 0.000 1.000 srgb} bind def +/col2 {0.000 1.000 0.000 srgb} bind def +/col3 {0.000 1.000 1.000 srgb} bind def +/col4 {1.000 0.000 0.000 srgb} bind def +/col5 {1.000 0.000 1.000 srgb} bind def +/col6 {1.000 1.000 0.000 srgb} bind def +/col7 {1.000 1.000 1.000 srgb} bind def +/col8 {0.000 0.000 0.560 srgb} bind def +/col9 {0.000 0.000 0.690 srgb} bind def +/col10 {0.000 0.000 0.820 srgb} bind def +/col11 {0.530 0.810 1.000 srgb} bind def +/col12 {0.000 0.560 0.000 srgb} bind def +/col13 {0.000 0.690 0.000 srgb} bind def +/col14 {0.000 0.820 0.000 srgb} bind def +/col15 {0.000 0.560 0.560 srgb} bind def +/col16 {0.000 0.690 0.690 srgb} bind def +/col17 {0.000 0.820 0.820 srgb} bind def +/col18 {0.560 0.000 0.000 srgb} bind def +/col19 {0.690 0.000 0.000 srgb} bind def +/col20 {0.820 0.000 0.000 srgb} bind def +/col21 {0.560 0.000 0.560 srgb} bind def +/col22 {0.690 0.000 0.690 srgb} bind def +/col23 {0.820 0.000 0.820 srgb} bind def +/col24 {0.500 0.190 0.000 srgb} bind def +/col25 {0.630 0.250 0.000 srgb} bind def +/col26 {0.750 0.380 0.000 srgb} bind def +/col27 {1.000 0.500 0.500 srgb} bind def +/col28 {1.000 0.630 0.630 srgb} bind def +/col29 {1.000 0.750 0.750 srgb} bind def +/col30 {1.000 0.880 0.880 srgb} bind def +/col31 {1.000 0.840 0.000 srgb} bind def + +end +save +-22.0 205.0 translate +1 -1 scale + +/cp {closepath} bind def +/ef {eofill} bind def +/gr {grestore} bind def +/gs {gsave} bind def +/sa {save} bind def +/rs {restore} bind def +/l {lineto} bind def +/m {moveto} bind def +/rm {rmoveto} bind def +/n {newpath} bind def +/s {stroke} bind def +/sh {show} bind def +/slc {setlinecap} bind def +/slj {setlinejoin} bind def +/slw {setlinewidth} bind def +/srgb {setrgbcolor} bind def +/rot {rotate} bind def +/sc {scale} bind def +/sd {setdash} bind def +/ff {findfont} bind def +/sf {setfont} bind def +/scf {scalefont} bind def +/sw {stringwidth} bind def +/tr {translate} bind def +/tnt {dup dup currentrgbcolor + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add + 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} + bind def +/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul + 4 -2 roll mul srgb} bind def + /DrawEllipse { + /endangle exch def + /startangle exch def + /yrad exch def + /xrad exch def + /y exch def + /x exch def + /savematrix mtrx currentmatrix def + x y tr xrad yrad sc 0 0 1 startangle endangle arc + closepath + savematrix setmatrix + } def + +/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def +/$F2psEnd {$F2psEnteredState restore end} def +%%EndProlog + +$F2psBegin +10 setmiterlimit +n -1000 5050 m -1000 -1000 l 5514 -1000 l 5514 5050 l cp clip + 0.05039 0.05039 sc +7.500 slw +% Ellipse +n 1575 2250 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 1575 3375 225 225 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 675 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 2475 3375 229 229 0 360 DrawEllipse gs col7 0.75 shd ef gr gs col0 s gr + +% Ellipse +n 3600 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 2880 2475 180 180 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 4320 2475 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Ellipse +n 3600 1485 186 186 0 360 DrawEllipse gs col7 0.50 shd ef gr gs col0 s gr + +% Polyline +n 675 3150 m 1395 2385 l gs col0 s gr +% Polyline +n 1575 2475 m 1575 3150 l gs col0 s gr +% Polyline +n 1755 2385 m 2475 3150 l gs col0 s gr +% Polyline + [60] 0 sd +gs clippath +3288 1467 m 3412 1462 l 3305 1524 l 3435 1487 l 3418 1429 l cp +clip +n 1537 2010 m 3412 1462 l gs col0 s gr gr + [] 0 sd +% arrowhead +n 3288 1467 m 3412 1462 l 3305 1524 l col0 s +% Polyline + [60] 0 sd +gs clippath +1796 2042 m 1672 2047 l 1779 1984 l 1649 2022 l 1666 2080 l cp +clip +n 3412 1537 m 1672 2047 l gs col0 s gr gr + [] 0 sd +% arrowhead +n 1796 2042 m 1672 2047 l 1779 1984 l col0 s +% Polyline + [60] 0 sd +gs clippath +2584 2524 m 2707 2512 l 2604 2581 l 2731 2535 l 2711 2479 l cp +933 3183 m 810 3195 l 913 3126 l 786 3172 l 806 3228 l cp +clip +n 810 3195 m 2707 2512 l gs col0 s gr gr + [] 0 sd +% arrowhead +n 933 3183 m 810 3195 l 913 3126 l col0 s +% arrowhead +n 2584 2524 m 2707 2512 l 2604 2581 l col0 s +% Polyline + [60] 0 sd +gs clippath +3319 2594 m 3442 2580 l 3340 2650 l 3467 2603 l 3446 2547 l cp +1863 3203 m 1740 3217 l 1842 3147 l 1715 3194 l 1736 3250 l cp +clip +n 1740 3217 m 3442 2580 l gs col0 s gr gr + [] 0 sd +% arrowhead +n 1863 3203 m 1740 3217 l 1842 3147 l col0 s +% arrowhead +n 3319 2594 m 3442 2580 l 3340 2650 l col0 s +% Polyline + [60] 0 sd +gs clippath +4054 2626 m 4177 2610 l 4076 2682 l 4202 2632 l 4180 2577 l cp +2763 3194 m 2640 3210 l 2741 3138 l 2615 3188 l 2637 3243 l cp +clip +n 2640 3210 m 4177 2610 l gs col0 s gr gr + [] 0 sd +% arrowhead +n 2763 3194 m 2640 3210 l 2741 3138 l col0 s +% arrowhead +n 4054 2626 m 4177 2610 l 4076 2682 l col0 s +/Courier-Bold ff 180.00 scf sf +3555 1530 m +gs 1 -1 sc (x) col0 sh gr +/Courier-Bold ff 180.00 scf sf +1530 2295 m +gs 1 -1 sc (n) col0 sh gr +/Courier ff 180.00 scf sf +1658 1950 m +gs 1 -1 sc 17.0 rot (n # extension) col0 sh gr +/Courier ff 180.00 scf sf +2475 1950 m +gs 1 -1 sc 17.0 rot (x # node) col0 sh gr +/Helvetica ff 180.00 scf sf +1020 4050 m +gs 1 -1 sc (The node tree) col0 sh gr +/Helvetica ff 180.00 scf sf +3225 3285 m +gs 1 -1 sc (The extensions) col0 sh gr +$F2psEnd +rs + +%%EndDocument + @endspecial 396 1928 a + currentpoint currentpoint translate 1 1 div 1 1 div scale neg exch +neg exch translate + 396 1928 a 357 x Fv(The)f(picture)f(sho)n(ws)i +(ho)n(w)e(the)i(nodes)e(and)h(e)o(xtensions)f(are)h(link)o(ed)f +(together)-5 b(.)19 b(Ev)o(ery)g(node)g(has)i(a)f(reference)f(to)h(its) +396 2393 y(e)o(xtension,)f(and)g(e)n(v)o(ery)g(e)o(xtension)g(has)h(a)h +(reference)d(to)j(its)g(node.)e(The)h(methods)f Fq(extension)g +Fv(and)h Fq(node)g Fv(follo)n(w)396 2501 y(these)h(references;)e(a)h +(typical)g(phrase)f(is)396 2681 y Fq(self)44 b(#)h(node)f(#)h +(attribute)e("xy")396 2872 y Fv(to)21 b(get)f(the)g(v)n(alue)g(of)f(an) +i(attrib)n(ute)e(from)h(a)g(method)f(de\002ned)g(in)h(the)h(e)o +(xtension)d(object;)i(or)396 3053 y Fq(self)44 b(#)h(node)f(#)h(iter) +486 3150 y(\(fun)f(n)h(-)p Fo(>)f Fq(n)h(#)f(extension)g(#)g(my_method) +g(...\))396 3341 y Fv(to)21 b(iterate)f(o)o(v)o(er)f(the)h(subnodes)f +(and)g(to)i(call)f Fq(my_method)f Fv(of)h(the)h(corresponding)16 +b(e)o(xtension)j(objects.)396 3490 y(Note)h(that)h(e)o(xtension)d +(objects)i(do)g(not)g(ha)n(v)o(e)g(references)e(to)j(subnodes)e(\(or)g +("sube)o(xtensions"\))f(themselv)o(es;)h(in)i(order)396 +3598 y(to)g(get)f(one)f(of)h(the)h(children)d(of)i(an)g(e)o(xtension)f +(you)g(must)i(\002rst)g(go)e(to)i(the)f(node)f(object,)h(then)f(get)h +(the)h(child)e(node,)396 3706 y(and)h(\002nally)g(reach)f(the)i(e)o +(xtension)d(that)j(is)g(logically)e(the)h(child)g(of)g(the)g(e)o +(xtension)f(you)g(started)h(with.)-2 4034 y Fp(3.3.1.)35 +b(Ho)n(w)f(to)f(de\002ne)h(an)g(e)n(xtension)i(c)n(lass)396 +4202 y Fv(At)21 b(minimum,)e(you)g(must)h(de\002ne)g(the)g(methods)f +Fq(clone)p Fv(,)h Fq(node)p Fv(,)g(and)f Fq(set_node)h +Fv(such)f(that)i(your)e(class)i(is)396 4310 y(compatible)e(with)h(the)h +(type)e Fq(extension)p Fv(.)g(The)h(method)f Fq(set_node)g +Fv(is)i(called)f(during)f(the)h(initialization)g(of)g(the)396 +4418 y(node,)f(or)h(after)g(a)h(node)e(has)h(been)g(cloned;)f(the)h +(node)f(object)h(in)m(v)n(ok)o(es)f Fq(set_node)g Fv(on)h(the)g(e)o +(xtension)f(object)h(to)g(tell)396 4526 y(it)h(that)f(this)h(node)e(is) +i(no)n(w)f(the)g(object)g(the)g(e)o(xtension)f(is)i(link)o(ed)f(to.)g +(The)f(e)o(xtension)g(must)h(return)f(the)i(node)e(object)396 +4633 y(passed)h(as)h(ar)o(gument)d(of)i Fq(set_node)f +Fv(when)h(the)g Fq(node)g Fv(method)f(is)i(called.)p +Black 3798 5278 a Fr(66)p Black eop +%%Page: 67 67 +67 66 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fv(The)g +Fq(clone)g Fv(method)f(must)h(return)f(a)i(cop)o(y)e(of)h(the)g(e)o +(xtension)f(object;)h(at)g(least)h(the)f(object)g(itself)h(must)f(be) +396 687 y(duplicated,)f(b)n(ut)h(if)g(required,)e(the)j(cop)o(y)e +(should)g(deeply)g(duplicate)g(all)i(objects)f(and)g(v)n(alues)g(that)g +(are)g(referred)e(by)396 795 y(the)i(e)o(xtension,)f(too.)h(Whether)f +(this)i(is)g(required,)d(depends)h(on)h(the)g(application;)f +Fq(clone)h Fv(is)h(in)m(v)n(ok)o(ed)d(by)i(the)g(node)396 +903 y(object)g(when)g(one)f(of)h(its)h(cloning)e(methods)g(is)i +(called.)396 1052 y(A)g(good)e(starting)h(point)f(for)h(an)g(e)o +(xtension)e(class:)396 1232 y Fq(class)44 b(custom_extension)e(=)486 +1329 y(object)i(\(self\))576 1524 y(val)g(mutable)g(node)g(=)g(\(None)g +(:)h(custom_extension)d(node)i(option\))576 1718 y(method)f(clone)h(=)h +({<)g(>})576 1912 y(method)e(node)i(=)665 2009 y(match)f(node)g(with) +845 2107 y(None)g(->)934 2204 y(assert)g(false)755 2301 +y(|)h(Some)f(n)g(->)h(n)576 2495 y(method)e(set_node)h(n)h(=)665 +2592 y(node)f(<-)h(Some)f(n)486 2786 y(end)396 2977 y +Fv(This)21 b(class)g(is)g(compatible)e(with)h Fq(extension)p +Fv(.)f(The)h(purpose)e(of)i(de\002ning)f(such)h(a)h(class)g(is,)g(of)f +(course,)f(adding)396 3085 y(further)g(methods;)g(and)h(you)f(can)h(do) +g(it)h(without)e(restriction.)396 3235 y(Often,)h(you)f(w)o(ant)h(not)g +(only)g(one)f(e)o(xtension)g(class.)i(In)f(this)h(case,)f(it)h(is)g +(the)f(simplest)h(w)o(ay)f(that)g(all)h(your)e(classes)i(\(for)396 +3343 y(one)f(kind)f(of)h(document\))e(ha)n(v)o(e)i(the)g(same)g(type)g +(\(with)g(respect)g(to)g(the)g(interf)o(ace;)g(i.e.)g(it)h(does)f(not)g +(matter)g(if)g(your)396 3451 y(classes)i(dif)n(fer)d(in)h(the)g +(de\002ned)f(pri)n(v)n(ate)h(methods)f(and)g(instance)h(v)n(ariables,)f +(b)n(ut)h(public)g(methods)f(count\).)f(This)396 3559 +y(approach)g(a)n(v)n(oids)i(lots)h(of)f(coercions)f(and)h(problems)e +(with)j(type)f(incompatibilities.)e(It)j(is)g(simple)f(to)g(implement:) +396 3739 y Fq(class)44 b(custom_extension)e(=)486 3836 +y(object)i(\(self\))576 3933 y(val)g(mutable)g(node)g(=)g(\(None)g(:)h +(custom_extension)d(node)i(option\))576 4127 y(method)f(clone)h(=)h +(...)269 b(\(*)44 b(see)g(above)g(*\))576 4224 y(method)f(node)i(=)f +(...)314 b(\(*)44 b(see)g(above)g(*\))576 4322 y(method)f(set_node)h(n) +h(=)f(...)h(\(*)f(see)g(above)g(*\))576 4516 y(method)f(virtual)h +(my_method1)f(:)i(...)576 4613 y(method)e(virtual)h(my_method2)f(:)i +(...)576 4710 y(...)f(\(*)g(etc.)h(*\))486 4807 y(end)p +Black 3797 5278 a Fr(67)p Black eop +%%Page: 68 68 +68 67 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 676 a Fq(class)44 +b(custom_extension_kind_A)d(=)486 773 y(object)j(\(self\))576 +870 y(inherit)f(custom_extension)576 1065 y(method)g(my_method1)h(=)g +(...)576 1162 y(method)f(my_method2)h(=)g(...)486 1259 +y(end)396 1453 y(class)g(custom_extension_kind_B)d(=)486 +1550 y(object)j(\(self\))576 1647 y(inherit)f(custom_extension)576 +1842 y(method)g(my_method1)h(=)g(...)576 1939 y(method)f(my_method2)h +(=)g(...)486 2036 y(end)396 2227 y Fv(If)20 b(a)h(class)g(does)f(not)g +(need)f(a)i(method)e(\(e.g.)g(because)h(it)h(does)e(not)h(mak)o(e)g +(sense,)g(or)g(it)h(w)o(ould)f(violate)f(some)396 2335 +y(important)g(condition\),)f(it)j(is)g(possible)f(to)g(de\002ne)g(the)g +(method)f(and)g(to)i(al)o(w)o(ays)f(raise)h(an)f(e)o(xception)e(when)i +(the)396 2443 y(method)f(is)i(in)m(v)n(ok)o(ed)e(\(e.g.)g +Fq(assert)44 b(false)p Fv(\).)396 2592 y(The)20 b(latter)g(is)i(a)e +(strong)g(recommendation:)c(do)k(not)g(try)g(to)g(further)f(specialize) +h(the)g(types)g(of)g(e)o(xtension)f(objects.)h(It)g(is)396 +2700 y(dif)n(\002cult,)g(sometimes)g(e)n(v)o(en)f(impossible,)g(and)h +(almost)g(ne)n(v)o(er)f(w)o(orth-while.)-2 3070 y Fp(3.3.2.)35 +b(Ho)n(w)f(to)f(bind)h(e)n(xtension)h(c)n(lasses)h(to)d(element)i +(types)396 3237 y Fv(Once)20 b(you)f(ha)n(v)o(e)h(de\002ned)f(your)g(e) +o(xtension)g(classes,)i(you)e(can)h(bind)g(them)f(to)i(element)e +(types.)h(The)g(simplest)h(case)f(is)396 3345 y(that)h(you)e(ha)n(v)o +(e)g(only)h(one)f(class)j(and)d(that)i(this)f(class)h(is)h(to)e(be)g +(al)o(w)o(ays)h(used.)e(The)h(parsing)f(functions)g(in)h(the)h(module) +396 3453 y Fq(Pxp_yacc)f Fv(tak)o(e)g(a)h Fq(spec)f Fv(ar)o(gument)d +(which)j(can)g(be)g(customized.)f(If)h(your)f(single)h(class)h(has)g +(the)f(name)f Fq(c)p Fv(,)i(this)396 3561 y(ar)o(gument)d(should)h(be) +396 3741 y Fq(let)45 b(spec)f(=)486 3839 y(make_spec_from_alist)576 +3936 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576 +4033 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576 +4130 y(~element_alist:)535 b([])576 4227 y(\(\))396 4418 +y Fv(This)21 b(means)f(that)g(data)g(nodes)f(will)i(be)f(created)g +(from)f(the)h(e)o(x)o(emplar)e(passed)i(by)g(~data_e)o(x)o(emplar)d +(and)j(that)g(all)396 4526 y(element)g(nodes)f(will)i(be)f(made)g(from) +f(the)h(e)o(x)o(emplar)e(speci\002ed)i(by)g(~def)o(ault_element_e)o(x)o +(emplar)-5 b(.)15 b(In)396 4634 y(~element_alist,)k(you)h(can)g(pass)g +(that)h(dif)n(ferent)d(e)o(x)o(emplars)h(are)h(to)g(be)g(used)g(for)g +(dif)n(ferent)e(element)i(types;)g(b)n(ut)g(this)396 +4742 y(is)h(an)g(optional)d(feature.)h(If)h(you)g(do)g(not)f(need)h +(it,)h(pass)f(the)g(empty)g(list.)p Black 3800 5278 a +Fr(68)p Black eop +%%Page: 69 69 +69 68 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fv(Remember)f(that)i(an)f +(e)o(x)o(emplar)e(is)j(a)g(\(node,)d(e)o(xtension\))h(pair)g(that)i +(serv)o(es)f(as)h(pattern)e(when)h(ne)n(w)g(nodes)f(\(and)g(the)396 +687 y(corresponding)e(e)o(xtension)i(objects\))g(are)h(added)f(to)i +(the)f(document)e(tree.)i(In)g(this)h(case,)f(the)g(e)o(x)o(emplar)f +(contains)g Fq(c)i Fv(as)396 795 y(e)o(xtension,)e(and)g(when)h(nodes)f +(are)i(created,)e(the)h(e)o(x)o(emplar)e(is)j(cloned,)e(and)h(cloning)f +(mak)o(es)h(also)g(a)h(cop)o(y)e(of)h Fq(c)h Fv(such)396 +903 y(that)g(all)f(nodes)g(of)g(the)g(document)e(tree)i(will)h(ha)n(v)o +(e)f(a)g(cop)o(y)g(of)g Fq(c)g Fv(as)h(e)o(xtension.)396 +1052 y(The)f Fq(~element_alist)f Fv(ar)o(gument)e(can)j(bind)g +(speci\002c)g(element)g(types)g(to)g(speci\002c)g(e)o(x)o(emplars;)f +(as)i(e)o(x)o(emplars)396 1160 y(may)f(be)g(instances)g(of)g(dif)n +(ferent)f(classes)i(it)g(is)g(ef)n(fecti)n(v)o(ely)d(possible)i(to)h +(bind)e(element)h(types)g(to)g(classes.)h(F)o(or)396 +1268 y(e)o(xample,)e(if)h(the)g(element)g(type)g("p")g(is)h +(implemented)d(by)i(class)h("c_p",)e(and)h("q")g(is)h(realized)f(by)f +("c_q",)h(you)f(can)396 1376 y(pass)i(the)f(follo)n(wing)f(v)n(alue:) +396 1556 y Fq(let)45 b(spec)f(=)486 1653 y(make_spec_from_alist)576 +1750 y(~data_exemplar:)535 b(\(new)44 b(data_impl)g(c\))576 +1847 y(~default_element_exemplar:)c(\(new)k(element_impl)f(c\))576 +1945 y(~element_alist:)665 2042 y([)i("p",)f(new)g(element_impl)f(c_p;) +755 2139 y("q",)h(new)g(element_impl)f(c_q;)665 2236 +y(])576 2333 y(\(\))396 2524 y Fv(The)20 b(e)o(xtension)f(object)h +Fq(c)g Fv(is)h(still)h(used)e(for)f(all)i(data)f(nodes)f(and)h(for)g +(all)g(other)g(element)f(types.)-2 3026 y Fx(3.4.)39 +b(Details)f(of)i(the)f(mapping)e(fr)m(om)i(XML)g(te)n(xt)g(to)g(the)g +(tree)-2 3212 y(representation)-2 3540 y Fp(3.4.1.)c(The)f +(representation)h(of)e(c)o(haracter)n(-free)h(elements)396 +3708 y Fv(If)20 b(an)g(element)g(declaration)f(does)h(not)f(allo)n(w)i +(the)f(element)f(to)i(contain)e(character)g(data,)h(the)g(follo)n(wing) +e(rules)j(apply)-5 b(.)396 3858 y(If)20 b(the)h(element)e(must)h(be)g +(empty)-5 b(,)19 b(i.e.)h(it)h(is)g(declared)e(with)i(the)f(k)o(e)o(yw) +o(ord)e Fq(EMPTY)p Fv(,)i(the)g(element)g(instance)g(must)g(be)396 +3965 y(ef)n(fecti)n(v)o(ely)f(empty)g(\(it)h(must)h(not)f(e)n(v)o(en)f +(contain)g(whitespace)h(characters\).)e(The)i(parser)g(guarantees)e +(that)j(a)f(declared)396 4073 y Fq(EMPTY)g Fv(element)g(does)g(ne)n(v)o +(er)f(contain)g(a)h(data)g(node,)f(e)n(v)o(en)g(if)i(the)f(data)g(node) +f(represents)h(the)g(empty)f(string.)396 4223 y(If)h(the)h(element)e +(declaration)g(only)g(permits)h(other)f(elements)h(to)h(occur)e(within) +h(that)g(element)g(b)n(ut)g(not)g(character)396 4331 +y(data,)g(it)h(is)g(still)g(possible)f(to)h(insert)f(whitespace)g +(characters)f(between)g(the)h(subelements.)f(The)h(parser)g(ignores)f +(these)396 4439 y(characters,)g(too,)h(and)g(does)f(not)h(create)g +(data)g(nodes)g(for)f(them.)396 4588 y Fu(Example.)h +Fv(Consider)g(the)g(follo)n(wing)f(element)g(types:)396 +4768 y Fq()396 +4865 y()p Black 3800 +5278 a Fr(69)p Black eop +%%Page: 70 70 +70 69 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fq()396 770 y Fv(Only)20 b Fq(x)h Fv(may)e(contain)h +(character)e(data,)i(the)h(k)o(e)o(yw)o(ord)d Fq(#PCDATA)h +Fv(indicates)h(this.)h(The)f(other)f(types)h(are)396 +878 y(character)n(-free.)396 1027 y(The)g(XML)g(term)396 +1207 y Fq()44 b()396 1398 y Fv(will)21 +b(be)f(internally)f(represented)g(by)g(an)i(element)e(node)g(for)h +Fq(x)g Fv(with)h(three)f(subnodes:)e(the)j(\002rst)g +Fq(z)f Fv(element,)g(a)g(data)396 1506 y(node)f(containing)g(the)h +(space)g(character)m(,)e(and)i(the)g(second)g Fq(z)g +Fv(element.)g(In)f(contrast)h(to)g(this,)h(the)f(term)396 +1686 y Fq()44 b()396 1877 y Fv(is)21 b(represented)e(by) +h(an)g(element)f(node)g(for)h Fq(y)h Fv(with)f(only)f +Fr(two)i Fv(subnodes,)e(the)h(tw)o(o)g Fq(z)h Fv(elements.)e(There)h +(is)h(no)f(data)396 1985 y(node)f(for)h(the)g(space)g(character)f +(because)h(spaces)g(are)g(ignored)f(in)h(the)g(character)n(-free)e +(element)i Fq(y)p Fv(.)-2 2355 y Fp(3.4.2.)35 b(The)f(representation)h +(of)e(c)o(haracter)h(data)396 2523 y Fv(The)20 b(XML)g(speci\002cation) +g(allo)n(ws)g(all)h(Unicode)e(characters)g(in)i(XML)f(te)o(xts.)g(This) +g(parser)g(can)g(be)g(con\002gured)e(such)396 2631 y(that)j(UTF-8)e(is) +i(used)f(to)h(represent)e(the)h(characters)f(internally;)g(ho)n(we)n(v) +o(er)m(,)f(the)i(def)o(ault)g(character)e(encoding)h(is)396 +2738 y(ISO-8859-1.)e(\(Currently)-5 b(,)18 b(no)i(other)f(encodings)g +(are)h(possible)g(for)f(the)i(internal)e(string)h(representation;)e +(the)i(type)396 2846 y Fq(Pxp_types.rep_encoding)d Fv(enumerates)i(the) +h(possible)g(encodings.)e(Principially)-5 b(,)19 b(the)h(parser)g +(could)f(use)h(an)o(y)396 2954 y(encoding)e(that)j(is)g +(ASCII-compatible,)d(b)n(ut)i(there)g(are)g(currently)e(only)i(le)o +(xical)f(analyzers)h(for)f(UTF-8)h(and)396 3062 y(ISO-8859-1.)d(It)k +(is)g(currently)d(impossible)i(to)g(use)h(UTF-16)e(or)h(UCS-4)g(as)h +(internal)f(encodings)e(\(or)i(other)f(multibyte)396 +3170 y(encodings)g(which)g(are)h(not)g(ASCII-compatible\))e(unless)i +(major)g(parts)g(of)g(the)g(parser)g(are)g(re)n(written)f(-)i(unlik)o +(ely)-5 b(...\))396 3320 y(The)20 b(internal)g(encoding)e(may)h(be)h +(dif)n(ferent)f(from)g(the)h(e)o(xternal)f(encoding)f(\(speci\002ed)i +(in)g(the)g(XML)h(declaration)396 3428 y Fo(<)p Fq(?xml)44 +b(...)g(encoding="..."?)p Fo(>)p Fv(\);)18 b(in)j(this)f(case)h(the)f +(strings)g(are)g(automatically)f(con)m(v)o(erted)f(to)i(the)g(internal) +396 3535 y(encoding.)396 3685 y(If)g(the)h(internal)e(encoding)f(is)j +(ISO-8859-1,)c(it)k(is)g(possible)f(that)g(there)g(are)g(characters)g +(that)g(cannot)f(be)h(represented.)396 3793 y(In)g(this)h(case,)f(the)g +(parser)g(ignores)f(such)h(characters)f(and)h(prints)g(a)h(w)o(arning)e +(\(to)h(the)g Fq(collect_warning)e Fv(object)396 3901 +y(that)j(must)f(be)g(passed)g(when)g(the)g(parser)f(is)i(called\).)396 +4050 y(The)f(XML)g(speci\002cation)g(allo)n(ws)g(lines)h(to)f(be)g +(separated)g(by)f(single)h(LF)h(characters,)e(by)h(CR)h(LF)g(character) +396 4158 y(sequences,)e(or)h(by)g(single)g(CR)i(characters.)d +(Internally)-5 b(,)18 b(these)i(separators)f(are)h(al)o(w)o(ays)h(con)m +(v)o(erted)d(to)i(single)g(LF)396 4266 y(characters.)396 +4416 y(The)g(parser)g(guarantees)e(that)j(there)e(are)i(ne)n(v)o(er)d +(tw)o(o)j(adjacent)e(data)h(nodes;)g(if)g(necessary)-5 +b(,)19 b(data)h(material)g(that)g(w)o(ould)396 4523 y(otherwise)g(be)g +(represented)e(by)i(se)n(v)o(eral)g(nodes)f(is)i(collapsed)f(into)f +(one)h(node.)f(Note)h(that)g(you)g(can)g(still)h(create)f(node)396 +4631 y(trees)h(with)f(adjacent)g(data)g(nodes;)f(ho)n(we)n(v)o(er)m(,)f +(the)i(parser)g(does)f(not)h(return)f(such)h(trees.)p +Black 3800 5278 a Fr(70)p Black eop +%%Page: 71 71 +71 70 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black 396 579 a Fv(Note)g(that)h(CD)m(A) +-9 b(T)h(A)20 b(sections)g(are)g(not)g(represented)f(specially;)h(such) +g(sections)g(are)g(added)f(to)h(the)h(current)d(data)396 +687 y(material)i(that)g(being)g(collected)f(for)h(the)g(ne)o(xt)f(data) +h(node.)-2 1056 y Fp(3.4.3.)35 b(The)f(representation)h(of)e(entities)h +(within)g(documents)396 1224 y Fr(Entities)21 b(ar)m(e)f(not)g(r)m(epr) +m(esented)f(within)i(documents!)d Fv(If)i(the)h(parser)e(\002nds)h(an)h +(entity)e(reference)g(in)h(the)g(document)396 1332 y(content,)f(the)h +(reference)f(is)i(immediately)e(e)o(xpanded,)e(and)j(the)g(parser)g +(reads)g(the)g(e)o(xpansion)e(te)o(xt)i(instead)g(of)g(the)396 +1440 y(reference.)-2 1810 y Fp(3.4.4.)35 b(The)f(representation)h(of)e +(attrib)n(utes)396 1977 y Fv(As)21 b(attrib)n(ute)f(v)n(alues)g(are)g +(composed)e(of)i(Unicode)f(characters,)g(too,)h(the)g(same)h(problems)d +(with)j(the)f(character)396 2085 y(encoding)e(arise)j(as)g(for)e +(character)g(material.)h(Attrib)n(ute)g(v)n(alues)g(are)g(con)m(v)o +(erted)d(to)k(the)f(internal)f(encoding,)f(too;)i(and)396 +2193 y(if)h(there)e(are)i(characters)e(that)h(cannot)f(be)h +(represented,)e(these)j(are)f(dropped,)e(and)h(a)i(w)o(arning)e(is)i +(printed.)396 2343 y(Attrib)n(ute)f(v)n(alues)g(are)g(normalized)e +(before)h(the)o(y)h(are)g(returned)e(by)i(methods)f(lik)o(e)h +Fq(attribute)p Fv(.)f(First,)i(an)o(y)396 2451 y(remaining)e(entity)h +(references)e(are)i(e)o(xpanded;)e(if)j(necessary)-5 +b(,)19 b(e)o(xpansion)f(is)j(performed)c(recursi)n(v)o(ely)-5 +b(.)18 b(Second,)396 2558 y(ne)n(wline)i(characters)f(\(an)o(y)g(of)h +(LF)-7 b(,)21 b(CR)g(LF)-7 b(,)21 b(or)f(CR)h(characters\))e(are)h(con) +m(v)o(erted)e(to)i(single)g(space)h(characters.)e(Note)396 +2666 y(that)i(especially)e(the)i(latter)f(action)g(is)h(prescribed)d +(by)i(the)g(XML)g(standard)f(\(b)n(ut)41 b(is)21 b(not)f(con)m(v)o +(erted)e(such)i(that)g(it)h(is)396 2774 y(still)h(possible)e(to)g +(include)f(line)h(feeds)g(into)g(attrib)n(utes\).)-2 +3144 y Fp(3.4.5.)35 b(The)f(representation)h(of)e(pr)n(ocessing)h +(instructions)396 3312 y Fv(Processing)20 b(instructions)f(are)h +(parsed)g(to)g(some)g(e)o(xtent:)f(The)h(\002rst)h(w)o(ord)f(of)g(the)g +(PI)g(is)i(called)e(the)g(tar)o(get,)f(and)g(it)i(is)396 +3420 y(stored)f(separated)f(from)g(the)i(rest)f(of)g(the)g(PI:)396 +3600 y Fq()396 3791 y Fv(The)20 b(e)o(xact)g +(location)f(where)h(a)g(PI)h(occurs)e(is)i(not)f(represented)f(\(by)g +(def)o(ault\).)g(The)h(parser)f(puts)i(the)f(PI)g(into)g(the)396 +3899 y(object)g(that)g(represents)g(the)g(embracing)e(construct)h(\(an) +h(element,)f(a)i(DTD,)f(or)g(the)g(whole)g(document\);)e(that)i(means) +396 4007 y(you)g(can)g(\002nd)f(out)h(which)g(PIs)h(occur)e(in)h(a)h +(certain)f(element,)f(in)h(the)h(DTD,)f(or)g(in)g(the)g(whole)g +(document,)e(b)n(ut)i(you)396 4114 y(cannot)f(lookup)g(the)h(e)o(xact)g +(position)f(within)h(the)g(construct.)396 4264 y(If)g(you)g(require)e +(the)j(e)o(xact)e(location)h(of)g(PIs,)g(it)h(is)g(possible)f(to)g +(create)g(e)o(xtra)g(nodes)f(for)h(them.)f(This)i(mode)e(is)396 +4372 y(controled)g(by)g(the)i(option)e Fq(enable_pinstr_nodes)p +Fv(.)e(The)j(additional)f(nodes)g(ha)n(v)o(e)h(the)g(node)f(type)h +Fq(T_pinstr)396 4480 y Fn(target)p Fv(,)g(and)f(are)i(created)e(from)g +(special)h(e)o(x)o(emplars)f(contained)f(in)j(the)f Fq(spec)g +Fv(\(see)g(pxp_document.mli\).)p Black 3800 5278 a Fr(71)p +Black eop +%%Page: 72 72 +72 71 bop Black 2225 67 a Fr(Chapter)20 b(3.)g(The)g(objects)g(r)m(epr) +m(esenting)g(the)g(document)p Black -2 583 a Fp(3.4.6.)35 +b(The)f(representation)h(of)e(comments)396 751 y Fv(Normally)-5 +b(,)19 b(comments)g(are)h(not)g(represented;)e(the)o(y)i(are)g(dropped) +e(by)h(def)o(ault.)h(Ho)n(we)n(v)o(er)m(,)e(if)i(you)f(require)g(them,) +h(it)h(is)396 859 y(possible)f(to)h(create)e Fq(T_comment)h +Fv(nodes)f(for)h(them.)f(This)i(mode)e(can)h(be)g(speci\002ed)g(by)g +(the)g(option)396 967 y Fq(enable_comment_nodes)p Fv(.)d(Comment)j +(nodes)f(are)h(created)g(from)f(special)h(e)o(x)o(emplars)f(contained)f +(in)j(the)f Fq(spec)396 1075 y Fv(\(see)h(pxp_document.mli\).)15 +b(Y)-9 b(ou)19 b(can)h(access)h(the)f(contents)g(of)g(comments)f +(through)f(the)i(method)f Fq(comment)p Fv(.)-2 1444 y +Fp(3.4.7.)35 b(The)f(attrib)n(utes)f Fc(xml:lang)d Fp(and)k +Fc(xml:space)396 1612 y Fv(These)20 b(attrib)n(utes)g(are)g(not)g +(supported)f(specially;)h(the)o(y)f(are)h(handled)f(lik)o(e)h(an)o(y)g +(other)f(attrib)n(ute.)-2 1982 y Fp(3.4.8.)35 b(And)f(what)f(about)h +(namespaces?)396 2149 y Fv(Currently)-5 b(,)19 b(there)g(is)i(no)f +(special)h(support)d(for)i(namespaces.)f(Ho)n(we)n(v)o(er)m(,)f(the)i +(parser)g(allo)n(ws)g(it)h(that)f(the)h(colon)e(occurs)396 +2257 y(in)i(names)e(such)h(that)h(it)g(is)g(possible)f(to)g(implement)f +(namespaces)g(on)h(top)g(of)g(the)g(current)f(API.)396 +2407 y(Some)h(future)f(release)h(of)g(PXP)h(will)g(support)e +(namespaces)g(as)i(b)n(uilt-in)f(feature...)p Black 3800 +5278 a Fr(72)p Black eop +%%Page: 73 73 +73 72 bop Black Black -2 621 a Fs(Chapter)48 b(4.)f(Con\002guring)j +(and)e(calling)f(the)h(par)m(ser)-2 1055 y Fx(4.1.)39 +b(Over)q(vie)n(w)396 1235 y Fv(There)20 b(are)g(the)g(follo)n(wing)f +(main)g(functions)g(in)m(v)n(oking)f(the)i(parser)g(\(in)g(Pxp_yacc\):) +p Black 396 1558 a Ft(\225)p Black 60 w Fr(par)o(se_document_entity:)d +Fv(Y)-9 b(ou)19 b(w)o(ant)i(to)f(parse)g(a)g(complete)g(and)f(closed)h +(document)e(consisting)i(of)g(a)g(DTD)h(and)479 1666 +y(the)f(document)f(body;)g(the)h(body)f(is)i(v)n(alidated)e(against)g +(the)h(DTD.)h(This)f(mode)f(is)i(interesting)f(if)g(you)f(ha)n(v)o(e)h +(a)h(\002le)479 1835 y Fq()f +()g(...)h()396 1984 y Fv(and)20 b(you)f(can)h(accept)g(an) +o(y)f(DTD)i(that)f(is)h(included)e(in)h(the)g(\002le)h(\(e.g.)f +(because)f(the)h(\002le)h(is)g(under)e(your)g(control\).)p +Black 396 2092 a Ft(\225)p Black 60 w Fr(par)o(se_wfdocument_entity:)e +Fv(Y)-9 b(ou)20 b(w)o(ant)g(to)g(parse)g(a)h(complete)e(and)h(closed)f +(document)g(consisting)g(of)h(a)h(DTD)479 2200 y(and)f(the)g(document)e +(body;)h(b)n(ut)h(the)h(body)d(is)k(not)d(v)n(alidated,)g(only)h(check) +o(ed)e(for)i(well-formedness.)e(This)i(mode)f(is)479 +2308 y(preferred)f(if)j(v)n(alidation)d(costs)j(too)f(much)f(time)i(or) +f(if)g(the)g(DTD)h(is)g(missing.)p Black 396 2416 a Ft(\225)p +Black 60 w Fr(par)o(se_dtd_entity:)d Fv(Y)-9 b(ou)20 +b(w)o(ant)g(only)f(to)i(parse)e(an)i(entity)e(\(\002le\))i(containing)d +(the)i(e)o(xternal)f(subset)h(of)g(a)h(DTD.)479 2524 +y(Sometimes)f(it)h(is)g(interesting)e(to)i(read)e(such)h(a)h(DTD,)f +(for)g(e)o(xample)e(to)j(compare)d(it)j(with)g(the)f(DTD)g(included)f +(in)h(a)479 2632 y(document,)e(or)i(to)g(apply)g(the)g(ne)o(xt)f(mode:) +p Black 396 2740 a Ft(\225)p Black 60 w Fr(par)o(se_content_entity:)e +Fv(Y)-9 b(ou)20 b(w)o(ant)g(only)g(to)g(parse)g(an)g(entity)g +(\(\002le\))g(containing)e(a)j(fragment)d(of)i(a)h(document)479 +2848 y(body;)e(this)i(fragment)d(is)j(v)n(alidated)f(against)f(the)h +(DTD)h(you)e(pass)i(to)f(the)g(function.)e(Especially)-5 +b(,)19 b(the)i(fragment)479 2956 y(must)g(not)e(ha)n(v)o(e)h(a)65 +b Fo(<)p Fq(!DOCTYPE)p Fo(>)19 b Fv(clause,)h(and)g(must)g(directly)g +(be)o(gin)f(with)h(an)g(element.)f(The)h(element)g(is)479 +3064 y(v)n(alidated)f(against)h(the)g(DTD.)g(This)h(mode)e(is)i +(interesting)e(if)i(you)e(w)o(ant)h(to)h(check)e(documents)f(against)i +(a)h(\002x)o(ed,)479 3172 y(immutable)e(DTD.)p Black +396 3280 a Ft(\225)p Black 60 w Fr(par)o(se_wfcontent_entity:)f +Fv(This)i(function)f(also)h(parses)g(a)h(single)f(element)g(without)f +(DTD,)h(b)n(ut)g(does)g(not)g(v)n(alidate)479 3388 y(it.)p +Black 396 3495 a Ft(\225)p Black 60 w Fr(e)n(xtr)o(act_dtd_fr)l +(om_document_entity:)15 b Fv(This)20 b(function)f(e)o(xtracts)g(the)i +(DTD)f(from)f(a)i(closed)f(document)479 3603 y(consisting)g(of)g(a)g +(DTD)h(and)e(a)i(document)d(body)-5 b(.)18 b(Both)j(the)f(internal)f +(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted.)396 +3794 y(In)f(man)o(y)f(cases,)i Fq(parse_document_entity)c +Fv(is)k(the)f(preferred)e(mode)i(to)g(parse)g(a)g(document)f(in)h(a)h +(v)n(alidating)396 3902 y(w)o(ay)-5 b(,)20 b(and)g Fq +(parse_wfdocument_entity)c Fv(is)22 b(the)e(mode)f(of)h(choice)f(to)i +(parse)f(a)g(\002le)h(while)f(only)g(checking)e(for)396 +4010 y(well-formedness.)396 4160 y(There)i(are)g(a)g(number)f(of)h(v)n +(ariations)f(of)h(these)g(modes.)f(One)h(important)f(application)g(of)h +(a)g(parser)g(is)h(to)f(check)396 4268 y(documents)f(of)h(an)g +(untrusted)f(source)g(against)h(a)g(\002x)o(ed)g(DTD.)g(One)g(solution) +f(is)i(to)g(not)f(allo)n(w)g(the)g Fo(<)p Fq(!DOCTYPE)p +Fo(>)396 4375 y Fv(clause)g(in)h(these)f(documents,)e(and)i(treat)g +(the)h(document)d(lik)o(e)i(a)h(fragment)d(\(using)i(mode)f +Fr(par)o(se_content_entity)p Fv(\).)396 4483 y(This)i(is)g(v)o(ery)e +(simple,)h(b)n(ut)g(in\003e)o(xible;)f(users)i(of)e(such)h(a)h(system)f +(cannot)f(e)n(v)o(en)h(de\002ne)f(additional)g(entities)i(to)396 +4591 y(abbre)n(viate)e(frequent)f(phrases)i(of)g(their)g(te)o(xt.)396 +4741 y(It)h(may)e(be)i(necessary)e(to)h(ha)n(v)o(e)g(a)h(more)e +(intelligent)g(check)o(er)-5 b(.)20 b(F)o(or)g(e)o(xample,)e(it)j(is)g +(also)g(possible)e(to)i(parse)f(the)396 4849 y(document)e(to)j(check)e +(fully)-5 b(,)19 b(i.e.)h(with)h(DTD,)f(and)f(to)i(compare)d(this)j +(DTD)f(with)h(the)f(prescribed)f(one.)g(In)h(order)f(to)p +Black 3800 5278 a Fr(73)p Black eop +%%Page: 74 74 +74 73 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(fully)g(parse)g(the)g +(document,)e(mode)h Fr(par)o(se_document_entity)e Fv(is)k(applied,)e +(and)h(to)g(get)g(the)g(DTD)h(to)f(compare)f(with)396 +687 y(mode)g Fr(par)o(se_dtd_entity)f Fv(can)i(be)h(used.)396 +836 y(There)f(is)h(another)d(v)o(ery)i(important)e(con\002gurable)g +(aspect)i(of)g(the)g(parser:)g(the)g(so-called)g(resolv)o(er)-5 +b(.)19 b(The)h(task)g(of)g(the)396 944 y(resolv)o(er)f(is)i(to)g +(locate)f(the)g(contents)f(of)h(an)g(\(e)o(xternal\))f(entity)g(for)h +(a)h(gi)n(v)o(en)e(entity)g(name,)h(and)f(to)i(mak)o(e)e(the)i +(contents)396 1052 y(accessible)g(as)f(a)h(character)e(stream.)h +(\(Furthermore,)d(it)k(also)f(normalizes)g(the)g(character)f(set;)i(b)n +(ut)f(this)h(is)g(a)f(detail)h(we)396 1160 y(can)f(ignore)f(here.\))g +(Consider)h(you)f(ha)n(v)o(e)h(a)g(\002le)h(called)f +Fq("main.xml")f Fv(containing)396 1340 y Fq()396 1437 y(\045sub;)396 +1628 y Fv(and)20 b(a)h(\002le)f(stored)g(in)g(the)h(subdirectory)c +Fq("sub")j Fv(with)h(name)e Fq("sub.xml")g Fv(containing)396 +1808 y Fq() +396 1906 y(\045subsub;)396 2097 y Fv(and)20 b(a)g(\002le)h(stored)e(in) +h(the)g(subdirectory)d Fq("subsub")j Fv(of)f Fq("sub")h +Fv(with)g(name)f Fq("subsub.xml")g Fv(\(the)g(contents)h(of)f(this)396 +2204 y(\002le)i(do)f(not)g(matter\).)f(Here,)h(the)g(resolv)o(er)f +(must)h(track)g(that)g(the)g(second)g(entity)g Fq(subsub)f +Fv(is)i(located)f(in)g(the)h(directory)396 2312 y Fq("sub/subsub")p +Fv(,)e(i.e.)h(the)g(dif)n(\002culty)f(is)i(to)g(interpret)e(the)h +(system)g(\(\002le\))h(names)e(of)h(entities)h(relati)n(v)o(e)e(to)i +(the)f(entities)396 2420 y(containing)f(them,)g(e)n(v)o(en)g(if)i(the)f +(entities)h(are)f(deeply)f(nested.)396 2570 y(There)h(is)h(not)f(a)g +(\002x)o(ed)g(resolv)o(er)f(already)g(doing)g(e)n(v)o(erything)e(right) +j(-)g(resolving)f(entity)h(names)g(is)h(a)f(task)h(that)f(highly)396 +2678 y(depends)f(on)h(the)g(en)m(vironment.)d(The)j(XML)g +(speci\002cation)f(only)h(demands)f(that)h Fq(SYSTEM)g +Fv(entities)g(are)g(interpreted)396 2786 y(lik)o(e)h(URLs)g(\(which)e +(is)i(not)f(v)o(ery)f(precise,)h(as)h(there)e(are)i(lots)f(of)g(URL)h +(schemes)f(in)g(use\),)g(hoping)f(that)h(this)h(helps)396 +2894 y(o)o(v)o(ercoming)c(the)j(local)g(peculiarities)g(of)g(the)g(en)m +(vironment;)d(the)k(idea)f(is)h(that)f(if)h(you)e(do)h(not)f(kno)n(w)h +(your)396 3001 y(en)m(vironment)d(you)j(can)g(refer)f(to)h(other)g +(entities)g(by)g(denoting)e(URLs)k(for)d(them.)h(I)g(think)g(that)g +(this)h(interpretation)d(of)396 3109 y Fq(SYSTEM)i Fv(names)g(may)g(ha) +n(v)o(e)f(some)h(applications)f(in)i(the)f(internet,)f(b)n(ut)h(it)h +(is)g(not)f(the)g(\002rst)h(choice)f(in)g(general.)396 +3217 y(Because)h(of)f(this,)g(the)g(resolv)o(er)f(is)i(a)g(separate)f +(module)e(of)i(the)h(parser)e(that)h(can)g(be)h(e)o(xchanged)c(by)j +(another)f(one)g(if)396 3325 y(necessary;)h(more)f(precisely)-5 +b(,)19 b(the)h(parser)g(already)f(de\002nes)h(se)n(v)o(eral)f(resolv)o +(ers.)396 3475 y(The)h(follo)n(wing)f(resolv)o(ers)g(do)h(already)f(e)o +(xist:)p Black 396 3707 a Ft(\225)p Black 60 w Fv(Resolv)o(ers)h +(reading)f(from)g(arbitrary)g(input)g(channels.)g(These)h(can)g(be)g +(con\002gured)e(such)i(that)g(a)h(certain)f(ID)g(is)479 +3815 y(associated)g(with)h(the)f(channel;)f(in)h(this)h(case)g(inner)e +(references)g(to)h(e)o(xternal)f(entities)i(can)f(be)g(resolv)o(ed.)e +(There)i(is)479 3923 y(also)h(a)f(special)h(resolv)o(er)e(that)h +(interprets)f(SYSTEM)i(IDs)f(as)h(URLs;)g(this)g(resolv)o(er)e(can)h +(process)g(relati)n(v)o(e)479 4031 y(SYSTEM)h(names)e(and)h(determine)f +(the)h(corresponding)d(absolute)i(URL.)p Black 396 4139 +a Ft(\225)p Black 60 w Fv(A)i(resolv)o(er)e(that)h(reads)g(al)o(w)o +(ays)h(from)e(a)i(gi)n(v)o(en)d(O'Caml)j(string.)e(This)i(resolv)o(er)e +(is)i(not)f(able)g(to)g(resolv)o(e)f(further)479 4247 +y(names)h(unless)g(the)h(string)f(is)h(not)f(associated)g(with)g(an)o +(y)f(name,)h(i.e.)g(if)g(the)g(document)f(contained)f(in)j(the)f +(string)479 4355 y(refers)g(to)g(an)g(e)o(xternal)f(entity)-5 +b(,)20 b(this)g(reference)f(cannot)g(be)h(follo)n(wed)f(in)h(this)h +(case.)p Black 396 4463 a Ft(\225)p Black 60 w Fv(A)g(resolv)o(er)e +(for)g(\002le)i(names.)f(The)g Fq(SYSTEM)g Fv(name)f(is)i(interpreted)e +(as)i(\002le)f(URL)h(with)g(the)f(slash)h("/")f(as)h(separator)479 +4571 y(for)f(directories.)f(-)h(This)h(resolv)o(er)d(is)k(deri)n(v)o +(ed)c(from)h(the)h(generic)f(URL)i(resolv)o(er)-5 b(.)396 +4720 y(The)20 b(interf)o(ace)f(a)i(resolv)o(er)e(must)h(ha)n(v)o(e)g +(is)h(documented,)c(so)k(it)g(is)g(possible)f(to)g(write)g(your)f(o)n +(wn)h(resolv)o(er)-5 b(.)19 b(F)o(or)396 4828 y(e)o(xample,)g(you)g +(could)g(connect)g(the)h(parser)g(with)g(an)h(HTTP)f(client,)g(and)f +(resolv)o(e)h(URLs)h(of)f(the)g(HTTP)g(namespace.)p Black +3800 5278 a Fr(74)p Black eop +%%Page: 75 75 +75 74 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(resolv)o(er)f +(classes)i(support)e(that)h(se)n(v)o(eral)g(independent)e(resolv)o(ers) +h(are)h(combined)e(to)i(one)g(more)f(po)n(werful)396 +687 y(resolv)o(er;)g(thus)h(it)h(is)g(possible)f(to)h(combine)d(a)j +(self-written)e(resolv)o(er)g(with)i(the)f(already)f(e)o(xisting)g +(resolv)o(ers.)396 836 y(Note)h(that)h(the)f(e)o(xisting)f(resolv)o +(ers)h(only)f(interpret)g Fq(SYSTEM)h Fv(names,)f(not)h +Fq(PUBLIC)g Fv(names.)g(If)g(it)h(helps)f(you,)f(it)h(is)396 +944 y(possible)g(to)f(de\002ne)h(resolv)o(ers)e(for)h +Fq(PUBLIC)h Fv(names,)f(too;)g(for)g(e)o(xample,)f(such)i(a)g(resolv)o +(er)e(could)h(look)g(up)g(the)h(public)396 1052 y(name)g(in)g(a)h(hash) +f(table,)g(and)f(map)h(it)h(to)f(a)h(system)f(name)g(which)g(is)h +(passed)f(o)o(v)o(er)f(to)h(the)g(e)o(xisting)g(resolv)o(er)e(for)396 +1160 y(system)j(names.)e(It)i(is)g(relati)n(v)o(ely)e(simple)h(to)g +(pro)o(vide)f(such)g(a)i(resolv)o(er)-5 b(.)-2 1579 y +Fx(4.2.)39 b(Resolver)n(s)e(and)i(sour)m(ces)-2 1907 +y Fp(4.2.1.)c(Using)f(the)g(b)n(uilt-in)f(resolver)n(s)i(\(called)g +(sour)n(ces\))396 2075 y Fv(The)20 b(type)g Fq(source)g +Fv(enumerates)e(the)j(tw)o(o)f(possibilities)h(where)e(the)h(document)f +(to)h(parse)g(comes)g(from.)396 2255 y Fq(type)44 b(source)g(=)576 +2352 y(Entity)f(of)i(\(\(dtd)f(-)p Fo(>)g Fq(Pxp_entity.entity\))e(*)j +(Pxp_reader.resolver\))486 2449 y(|)g(ExtID)f(of)g(\(ext_id)g(*)g +(Pxp_reader.resolver\))396 2640 y Fv(Y)-9 b(ou)20 b(normally)e(need)i +(not)g(to)g(w)o(orry)f(about)h(this)g(type)g(as)h(there)f(are)g(con)m +(v)o(enience)d(functions)i(that)h(create)g Fq(source)396 +2748 y Fv(v)n(alues:)p Black 396 3105 a Ft(\225)p Black +60 w Fq(from_file)44 b(s)p Fv(:)20 b(The)g(document)e(is)j(read)f(from) +f(\002le)i Fq(s)p Fv(;)g(you)e(may)h(specify)f(absolute)h(or)g(relati)n +(v)o(e)f(path)h(names.)479 3213 y(The)g(\002le)h(name)f(must)g(be)g +(encoded)e(as)j(UTF-8)f(string.)479 3362 y(There)g(is)h(an)f(optional)f +(ar)o(gument)f Fq(~system_encoding)g Fv(specifying)g(the)j(character)d +(encoding)h(which)g(is)i(used)479 3470 y(for)f(the)g(names)g(of)g(the)g +(\002le)h(system.)f(F)o(or)g(e)o(xample,)e(if)j(this)g(encoding)d(is)j +(ISO-8859-1)c(and)j Fq(s)g Fv(is)i(also)e(a)479 3578 +y(ISO-8859-1)e(string,)h(you)h(can)g(form)f(the)h(source:)479 +3717 y Fq(let)45 b(s_utf8)88 b(=)i(recode_string)42 b +(~in_enc:`Enc_iso88591)g(~out_enc:`Enc_utf8)g(s)i(in)479 +3814 y(from_file)g(~system_encoding:`Enc_iso88591)39 +b(s_utf8)479 4005 y Fv(This)21 b Fq(source)e Fv(has)i(the)f(adv)n +(antage)e(that)j(it)f(is)i(able)e(to)g(resolv)o(e)f(inner)h(e)o +(xternal)f(entities;)h(i.e.)g(if)h(your)e(document)479 +4113 y(includes)g(data)g(from)g(another)f(\002le)i(\(using)f(the)g +Fq(SYSTEM)g Fv(attrib)n(ute\),)g(this)g(mode)g(will)h(\002nd)f(that)h +(\002le.)g(Ho)n(we)n(v)o(er)m(,)d(this)479 4221 y(mode)j(cannot)f +(resolv)o(e)g Fq(PUBLIC)h Fv(identi\002ers)f(nor)h Fq(SYSTEM)g +Fv(identi\002ers)f(other)h(than)g("\002le:".)p Black +396 4370 a Ft(\225)p Black 60 w Fq(from_channel)43 b(ch)p +Fv(:)21 b(The)e(document)g(is)i(read)e(from)h(the)g(channel)f +Fq(ch)p Fv(.)h(In)g(general,)f(this)h(source)g(also)g(supports)479 +4478 y(\002le)h(URLs)g(found)e(in)h(the)g(document;)f(ho)n(we)n(v)o(er) +m(,)e(by)j(def)o(ault)f(only)h(absolute)f(URLs)i(are)f(understood.)e +(It)i(is)479 4586 y(possible)g(to)h(associate)f(an)g(ID)g(with)h(the)f +(channel)f(such)h(that)g(the)g(resolv)o(er)f(kno)n(ws)h(ho)n(w)f(to)i +(interpret)e(relati)n(v)o(e)479 4694 y(URLs:)479 4832 +y Fq(from_channel)43 b(~id:\(System)g("file:///dir/dir1/"\))f(ch)p +Black 3800 5278 a Fr(75)p Black eop +%%Page: 76 76 +76 75 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(There)g(is)h(also)f +(the)g(~system_encoding)e(ar)o(gument)f(specifying)i(ho)n(w)h(\002le)h +(names)e(are)i(encoded.)d(-)i(The)g(e)o(xample)479 687 +y(from)f(abo)o(v)o(e)g(can)h(also)g(be)h(written)f(\(b)n(ut)f(it)i(is)g +(no)f(longer)f(possible)h(to)g(interpret)f(relati)n(v)o(e)h(URLs)h +(because)e(there)h(is)479 795 y(no)g(~id)g(ar)o(gument,)e(and)i +(computing)d(this)k(ar)o(gument)d(is)j(relati)n(v)o(ely)e(complicated)g +(because)g(it)i(must)f(be)h(a)f(v)n(alid)479 903 y(URL\):)479 +1041 y Fq(let)45 b(ch)f(=)h(open_in)e(s)i(in)479 1138 +y(let)g(src)f(=)h(from_channel)d(~system_encoding:`Enc_iso88591)e(ch)45 +b(in)479 1236 y(...;)479 1333 y(close_in)f(ch)p Black +396 1482 a Ft(\225)p Black 60 w Fq(from_string)f(s)p +Fv(:)21 b(The)f(string)g Fq(s)g Fv(is)h(the)g(document)d(to)i(parse.)g +(This)g(mode)f(is)j(not)d(able)h(to)h(interpret)e(\002le)i(names)479 +1590 y(of)f Fq(SYSTEM)g Fv(clauses,)g(nor)g(it)h(can)f(look)f(up)h +Fq(PUBLIC)f Fv(identi\002ers.)479 1740 y(Normally)-5 +b(,)19 b(the)h(encoding)e(of)i(the)g(string)g(is)h(detected)e(as)i +(usual)f(by)g(analyzing)f(the)h(XML)g(declaration,)e(if)j(an)o(y)-5 +b(.)479 1847 y(Ho)n(we)n(v)o(er)m(,)18 b(it)j(is)g(also)g(possible)f +(to)g(specify)g(the)g(encoding)e(directly:)479 1986 y +Fq(let)45 b(src)f(=)h(from_string)e(~fixenc:`ISO-8859-2)e(s)p +Black 396 2177 a Ft(\225)p Black 60 w Fq(ExtID)j(\(id,)g(r\))p +Fv(:)21 b(The)f(document)e(to)i(parse)g(is)h(denoted)e(by)h(the)g +(identi\002er)g Fq(id)g Fv(\(either)f(a)i Fq(SYSTEM)f +Fv(or)g Fq(PUBLIC)479 2285 y Fv(clause\),)g(and)g(this)g(identi\002er)g +(is)h(interpreted)d(by)i(the)g(resolv)o(er)f Fq(r)p Fv(.)i(Use)f(this)h +(mode)e(if)i(you)e(ha)n(v)o(e)h(written)g(your)f(o)n(wn)479 +2393 y(resolv)o(er)-5 b(.)479 2542 y(Which)20 b(character)f(sets)j(are) +e(possible)g(depends)e(on)i(the)g(passed)h(resolv)o(er)d +Fq(r)p Fv(.)p Black 396 2692 a Ft(\225)p Black 60 w Fq(Entity)44 +b(\(get_entity,)f(r\))p Fv(:)20 b(The)g(document)e(to)j(parse)f(is)h +(returned)d(by)i(the)g(function)f(in)m(v)n(ocation)479 +2800 y Fq(get_entity)43 b(dtd)p Fv(,)20 b(where)g Fq(dtd)g +Fv(is)h(the)g(DTD)f(object)g(to)g(use)g(\(it)h(may)f(be)g(empty\).)f +(Inner)f(e)o(xternal)h(references)479 2908 y(occuring)g(in)h(this)h +(entity)e(are)i(resolv)o(ed)d(using)i(the)g(resolv)o(er)f +Fq(r)p Fv(.)479 3057 y(Which)h(character)f(sets)j(are)e(possible)g +(depends)e(on)i(the)g(passed)h(resolv)o(er)d Fq(r)p Fv(.)-2 +3510 y Fp(4.2.2.)35 b(The)f(resolver)g(API)396 3677 y +Fv(A)21 b(resolv)o(er)e(is)i(an)f(object)g(that)g(can)g(be)g(opened)e +(lik)o(e)j(a)f(\002le,)h(b)n(ut)f(you)f(do)h(not)g(pass)g(the)h(\002le) +f(name)g(to)g(the)g(resolv)o(er)m(,)f(b)n(ut)396 3785 +y(the)h(XML)h(identi\002er)e(of)h(the)g(entity)g(to)h(read)e(from)g +(\(either)h(a)g Fq(SYSTEM)g Fv(or)g Fq(PUBLIC)g Fv(clause\).)f(When)h +(opened,)f(the)396 3893 y(resolv)o(er)g(must)h(return)f(the)i +Fq(Lexing.lexbuf)d Fv(that)i(reads)g(the)h(characters.)e(The)g(resolv)o +(er)g(can)h(be)h(closed,)e(and)h(it)396 4001 y(can)g(be)g(cloned.)f +(Furthermore,)f(it)j(is)g(possible)f(to)g(tell)h(the)f(resolv)o(er)f +(which)h(character)f(set)i(it)g(should)e(assume.)h(-)g(The)396 +4109 y(follo)n(wing)f(from)g(Pxp_reader:)396 4289 y Fq(exception)44 +b(Not_competent)396 4386 y(exception)g(Not_resolvable)e(of)j(exn)396 +4581 y(class)f(type)g(resolver)g(=)486 4678 y(object)576 +4775 y(method)f(init_rep_encoding)f(:)j(rep_encoding)e(->)h(unit)576 +4872 y(method)f(init_warner)g(:)i(collect_warnings)d(->)j(unit)p +Black 3798 5278 a Fr(76)p Black eop +%%Page: 77 77 +77 76 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43 +b(rep_encoding)g(:)i(rep_encoding)576 676 y(method)e(open_in)h(:)h +(ext_id)f(->)g(Lexing.lexbuf)576 773 y(method)f(close_in)h(:)h(unit)576 +870 y(method)e(change_encoding)g(:)h(string)g(->)h(unit)576 +967 y(method)e(clone)h(:)h(resolver)576 1065 y(method)e(close_all)h(:)g +(unit)486 1162 y(end)396 1353 y Fv(The)20 b(resolv)o(er)f(object)h +(must)g(w)o(ork)f(as)i(follo)n(ws:)p Black 396 1627 a +Ft(\225)p Black 60 w Fv(When)f(the)h(parser)e(is)i(called,)f(it)h +(tells)g(the)f(resolv)o(er)f(the)h(w)o(arner)g(object)f(and)h(the)g +(internal)g(encoding)e(by)i(in)m(v)n(oking)479 1735 y +Fq(init_warner)f Fv(and)h Fq(init_rep_encoding)p Fv(.)d(The)j(resolv)o +(er)f(should)g(store)i(these)f(v)n(alues.)f(The)h(method)479 +1843 y Fq(rep_encoding)f Fv(should)g(return)g(the)h(internal)g +(encoding.)p Black 396 1950 a Ft(\225)p Black 60 w Fv(If)g(the)h +(parser)e(w)o(ants)i(to)f(read)g(from)f(the)h(resolv)o(er)m(,)e(it)j +(in)m(v)n(ok)o(es)f(the)g(method)f Fq(open_in)p Fv(.)g(Either)h(the)g +(resolv)o(er)479 2058 y(succeeds,)g(in)g(which)g(case)g(the)h +Fq(Lexing.lexbuf)d Fv(reading)h(from)g(the)h(\002le)h(or)f(stream)g +(must)g(be)h(returned,)d(or)479 2166 y(opening)h(f)o(ails.)h(In)g(the)g +(latter)h(case)f(the)h(method)d(implementation)g(should)h(raise)i(an)f +(e)o(xception)e(\(see)j(belo)n(w\).)p Black 396 2274 +a Ft(\225)p Black 60 w Fv(If)f(the)h(parser)e(\002nishes)i(reading,)d +(it)j(calls)g(the)f Fq(close_in)g Fv(method.)p Black +396 2382 a Ft(\225)p Black 60 w Fv(If)g(the)h(parser)e(\002nds)h(a)h +(reference)d(to)j(another)e(e)o(xternal)f(entity)i(in)h(the)f(input)f +(stream,)h(it)h(calls)g Fq(clone)f Fv(to)g(get)h(a)479 +2490 y(second)f(resolv)o(er)f(which)g(must)h(be)h(initially)f(closed)g +(\(not)f(yet)h(connected)f(with)h(an)g(input)f(stream\).)h(The)g +(parser)479 2598 y(then)g(in)m(v)n(ok)o(es)f Fq(open_in)h +Fv(and)f(the)i(other)e(methods)g(as)i(described.)p Black +396 2706 a Ft(\225)p Black 60 w Fv(If)f(you)g(already)f(kno)n(w)g(the)h +(character)f(set)i(of)f(the)g(input)g(stream,)f(you)h(should)f(recode)g +(it)i(to)f(the)g(internal)479 2814 y(encoding,)e(and)i(de\002ne)f(the)i +(method)d Fq(change_encoding)h Fv(as)i(an)f(empty)f(method.)p +Black 396 2922 a Ft(\225)p Black 60 w Fv(If)h(you)g(w)o(ant)g(to)g +(support)f(multiple)h(e)o(xternal)f(character)g(sets,)i(the)f(object)f +(must)i(follo)n(w)e(a)i(much)e(more)479 3030 y(complicated)g(protocol.) +f(Directly)i(after)g Fq(open_in)f Fv(has)i(been)e(called,)h(the)g +(resolv)o(er)f(must)h(return)f(a)i(le)o(xical)f(b)n(uf)n(fer)479 +3138 y(that)h(only)e(reads)h(one)g(byte)f(at)i(a)g(time.)f(This)g(is)h +(only)f(possible)f(if)i(you)e(create)h(the)g(le)o(xical)g(b)n(uf)n(fer) +f(with)479 3246 y Fq(Lexing.from_function)p Fv(;)e(the)j(function)d +(must)j(then)f(al)o(w)o(ays)h(return)e(1)i(if)f(the)h(EOF)g(is)g(not)f +(yet)h(reached,)e(and)h(0)479 3354 y(if)i(EOF)f(is)h(reached.)e(If)h +(the)g(parser)g(has)g(read)g(the)g(\002rst)h(line)f(of)g(the)h +(document,)c(it)k(will)g(in)m(v)n(ok)o(e)479 3461 y Fq(change_encoding) +e Fv(to)h(tell)h(the)f(resolv)o(er)f(which)h(character)e(set)j(to)g +(assume.)f(From)f(this)i(moment,)e(the)h(object)479 3569 +y(can)g(return)f(more)h(than)f(one)h(byte)g(at)g(once.)g(The)g(ar)o +(gument)d(of)j Fq(change_encoding)f Fv(is)i(either)e(the)i(parameter)d +(of)479 3677 y(the)i("encoding")e(attrib)n(ute)i(of)g(the)g(XML)h +(declaration,)d(or)i(the)g(empty)f(string)h(if)h(there)e(is)j(not)d(an) +o(y)h(XML)479 3785 y(declaration)f(or)h(if)g(the)h(declaration)d(does)i +(not)g(contain)f(an)h(encoding)e(attrib)n(ute.)479 3935 +y(At)j(the)f(be)o(ginning)e(the)i(resolv)o(er)f(must)h(only)g(return)f +(one)g(character)g(e)n(v)o(ery)g(time)h(something)f(is)i(read)f(from)f +(the)479 4043 y(le)o(xical)h(b)n(uf)n(fer)-5 b(.)19 b(The)h(reason)f +(for)h(this)h(is)g(that)f(you)f(otherwise)h(w)o(ould)f(not)h(e)o +(xactly)g(kno)n(w)f(at)h(which)g(position)f(in)479 4151 +y(the)h(input)g(stream)g(the)g(character)f(set)i(changes.)479 +4300 y(If)f(you)g(w)o(ant)g(automatic)f(recognition)f(of)i(the)g +(character)f(set,)i(it)g(is)g(up)f(to)g(the)g(resolv)o(er)f(object)h +(to)g(implement)f(this.)p Black 396 4449 a Ft(\225)p +Black 60 w Fv(If)h(an)g(error)g(occurs,)f(the)h(parser)g(calls)g(the)h +(method)d Fq(close_all)i Fv(for)f(the)h(top-le)n(v)o(el)f(resolv)o(er;) +g(this)i(method)479 4557 y(should)e(close)i(itself)g(\(if)f(not)g +(already)f(done\))f(and)i(all)h(clones.)396 4748 y Fu(Exceptions.)f +Fv(It)h(is)g(possible)f(to)g(chain)g(resolv)o(ers)f(such)h(that)g(when) +g(the)g(\002rst)h(resolv)o(er)e(is)i(not)f(able)g(to)g(open)f(the)396 +4856 y(entity)-5 b(,)20 b(the)g(other)f(resolv)o(ers)g(of)h(the)g +(chain)g(are)g(tried)g(in)g(turn.)g(The)g(method)e Fq(open_in)i +Fv(should)f(raise)i(the)f(e)o(xception)p Black 3797 5278 +a Fr(77)p Black eop +%%Page: 78 78 +78 77 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fq(Not_competent)f +Fv(to)h(indicate)g(that)g(the)g(ne)o(xt)g(resolv)o(er)f(should)g(try)h +(to)g(open)f(the)i(entity)-5 b(.)19 b(If)h(the)g(resolv)o(er)f(is)i +(able)f(to)396 687 y(handle)f(the)i(ID,)f(b)n(ut)g(some)g(other)f +(error)g(occurs,)g(the)i(e)o(xception)d Fq(Not_resolvable)g +Fv(should)i(be)g(raised)g(to)g(force)396 795 y(that)h(the)f(chain)f +(breaks.)396 944 y(Example:)g(Ho)n(w)h(to)h(de\002ne)e(a)i(resolv)o(er) +e(that)h(is)h(equi)n(v)n(alent)e(to)h(from_string:)e(...)-2 +1314 y Fp(4.2.3.)35 b(Prede\002ned)f(resolver)h(components)396 +1482 y Fv(There)20 b(are)g(some)g(classes)h(in)f(Pxp_reader)e(that)j +(de\002ne)e(common)g(resolv)o(er)f(beha)n(viour)-5 b(.)396 +1662 y Fq(class)44 b(resolve_read_this_channel)d(:)576 +1759 y(?id:ext_id)i(->)576 1856 y(?fixenc:encoding)f(->)576 +1953 y(?auto_close:bool)g(->)576 2050 y(in_channel)h(->)755 +2147 y(resolver)396 2338 y Fv(Reads)21 b(from)e(the)h(passed)g(channel) +f(\(it)i(may)f(be)g(e)n(v)o(en)f(a)i(pipe\).)e(If)h(the)g +Fq(~id)g Fv(ar)o(gument)e(is)j(passed)f(to)h(the)f(object,)f(the)396 +2446 y(created)h(resolv)o(er)f(accepts)h(only)f(this)i(ID.)f(Otherwise) +g(all)h(IDs)f(are)g(accepted.)f(-)i(Once)f(the)g(resolv)o(er)f(has)h +(been)396 2554 y(cloned,)f(it)h(does)g(not)f(accept)h(an)o(y)f(ID.)g +(This)h(means)g(that)g(this)g(resolv)o(er)e(cannot)h(handle)g(inner)g +(references)f(to)i(e)o(xternal)396 2662 y(entities.)h(Note)f(that)g +(you)f(can)h(combine)f(this)i(resolv)o(er)e(with)h(another)f(resolv)o +(er)g(that)h(can)g(handle)f(inner)g(references)396 2770 +y(\(such)h(as)h(resolv)o(e_as_\002le\);)d(see)j(class)g('combine')d +(belo)n(w)-5 b(.)19 b(-)h(If)g(you)g(pass)g(the)h Fq(~fixenc)e +Fv(ar)o(gument,)f(the)i(encoding)396 2878 y(of)g(the)g(channel)f(is)i +(set)g(to)g(the)f(passed)g(v)n(alue,)f(re)o(gardless)g(of)h(an)o(y)f +(auto-recognition)e(or)j(an)o(y)f(XML)h(declaration.)f(-)h(If)396 +2986 y Fq(~auto_close)43 b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o +(ault\),)e(the)h(channel)f(is)i(closed)f(after)g(use.)g(If)g +Fq(~auto_close)43 b(=)396 3094 y(false)p Fv(,)20 b(the)g(channel)f(is)i +(left)g(open.)396 3315 y Fq(class)44 b(resolve_read_any_channel)d(:)576 +3413 y(?auto_close:bool)h(->)576 3510 y(channel_of_id:\(ext_id)f(->)j +(\(in_channel)f(*)i(encoding)f(option\)\))f(->)755 3607 +y(resolver)396 3798 y Fv(This)21 b(resolv)o(er)e(calls)h(the)h +(function)d Fq(~channel_of_id)h Fv(to)h(open)f(a)i(ne)n(w)f(channel)f +(for)g(the)h(passed)g Fq(ext_id)p Fv(.)g(This)396 3906 +y(function)f(must)h(either)g(return)f(the)h(channel)f(and)h(the)g +(encoding,)e(or)i(it)g(must)h(f)o(ail)f(with)h(Not_competent.)c(The)396 +4014 y(function)i(must)h(return)f Fq(None)h Fv(as)h(encoding)d(if)j +(the)f(def)o(ault)f(mechanism)g(to)h(recognize)f(the)h(encoding)e +(should)h(be)396 4122 y(used.)g(It)i(must)e(return)g +Fq(Some)44 b(e)20 b Fv(if)g(it)h(is)f(already)f(kno)n(wn)f(that)i(the)g +(encoding)d(of)j(the)f(channel)g(is)i Fq(e)p Fv(.)e(If)h +Fq(~auto_close)396 4230 y(=)45 b(true)19 b Fv(\(which)g(is)h(the)f(def) +o(ault\),)f(the)i(channel)e(is)i(closed)f(after)g(use.)h(If)f +Fq(~auto_close)43 b(=)h(false)p Fv(,)19 b(the)h(channel)e(is)396 +4337 y(left)j(open.)396 4559 y Fq(class)44 b(resolve_read_url_channel)d +(:)576 4656 y(?base_url:Neturl.url)g(->)576 4753 y(?auto_close:bool)h +(->)576 4851 y(url_of_id:\(ext_id)g(->)i(Neturl.url\))f(->)p +Black 3800 5278 a Fr(78)p Black eop +%%Page: 79 79 +79 78 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 576 579 a Fq +(channel_of_url:\(Neturl.url)40 b(->)45 b(\(in_channel)e(*)h(encoding)g +(option\)\))f(->)755 676 y(resolver)396 867 y Fv(When)20 +b(this)h(resolv)o(er)e(gets)h(an)h(ID)f(to)g(read)g(from,)f(it)i(calls) +g(the)f(function)e Fq(~url_of_id)h Fv(to)i(get)f(the)g(corresponding) +396 975 y(URL.)h(This)f(URL)h(may)f(be)g(a)g(relati)n(v)o(e)g(URL;)h +(ho)n(we)n(v)o(er)m(,)c(a)k(URL)g(scheme)f(must)g(be)g(used)g(which)f +(contains)h(a)h(path.)396 1083 y(The)f(resolv)o(er)f(con)m(v)o(erts)g +(the)h(URL)h(to)f(an)g(absolute)f(URL)i(if)g(necessary)-5 +b(.)19 b(The)g(second)h(function,)396 1191 y Fq(~channel_of_url)p +Fv(,)e(is)j(fed)f(with)h(the)f(absolute)f(URL)i(as)g(input.)e(This)h +(function)f(opens)g(the)i(resource)e(to)h(read)396 1299 +y(from,)f(and)h(returns)f(the)h(channel)f(and)h(the)g(encoding)e(of)i +(the)g(resource.)396 1448 y(Both)g(functions,)f Fq(~url_of_id)g +Fv(and)h Fq(~channel_of_url)p Fv(,)e(can)i(raise)g(Not_competent)e(to)i +(indicate)g(that)g(the)396 1556 y(object)g(is)h(not)f(able)g(to)g(read) +g(from)f(the)h(speci\002ed)g(resource.)f(Ho)n(we)n(v)o(er)m(,)f(there)i +(is)h(a)f(dif)n(ference:)f(A)h(Not_competent)396 1664 +y(from)f Fq(~url_of_id)g Fv(is)j(left)e(as)h(it)g(is,)g(b)n(ut)f(a)h +(Not_competent)c(from)i Fq(~channel_of_url)g Fv(is)i(con)m(v)o(erted)c +(to)396 1772 y(Not_resolv)n(able.)h(So)i(only)g Fq(~url_of_id)f +Fv(decides)h(which)f(URLs)i(are)f(accepted)g(by)f(the)i(resolv)o(er)e +(and)g(which)h(not.)396 1921 y(The)g(function)f Fq(~channel_of_url)f +Fv(must)i(return)f Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o(ault)f +(mechanism)g(to)i(recognize)396 2029 y(the)f(encoding)f(should)g(be)h +(used.)g(It)g(must)g(return)f Fq(Some)44 b(e)21 b Fv(if)g(it)f(is)i +(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i(the)396 +2137 y(channel)f(is)i Fq(e)p Fv(.)396 2287 y(If)f Fq(~auto_close)43 +b(=)i(true)20 b Fv(\(which)f(is)i(the)g(def)o(ault\),)e(the)h(channel)f +(is)i(closed)f(after)g(use.)g(If)g Fq(~auto_close)43 +b(=)396 2395 y(false)p Fv(,)20 b(the)g(channel)f(is)i(left)g(open.)396 +2544 y(Objects)f(of)g(this)g(class)h(contain)e(a)h(base)g(URL)g(relati) +n(v)o(e)f(to)h(which)g(relati)n(v)o(e)f(URLs)h(are)g(interpreted.)e +(When)i(creating)e(a)396 2652 y(ne)n(w)i(object,)g(you)f(can)h(specify) +f(the)i(base)f(URL)h(by)f(passing)f(it)i(as)g Fq(~base_url)e +Fv(ar)o(gument.)f(When)i(an)g(e)o(xisting)396 2760 y(object)g(is)h +(cloned,)e(the)h(base)g(URL)h(of)f(the)g(clone)g(is)h(the)f(URL)h(of)f +(the)g(original)f(object.)h(-)g(Note)g(that)g(the)h(term)f("base)396 +2868 y(URL")h(has)f(a)h(strict)g(de\002nition)e(in)h(RFC)i(1808.)396 +3089 y Fq(class)44 b(resolve_read_this_string)d(:)576 +3187 y(?id:ext_id)i(->)576 3284 y(?fixenc:encoding)f(->)576 +3381 y(string)h(->)755 3478 y(resolver)396 3669 y Fv(Reads)21 +b(from)e(the)h(passed)g(string.)g(If)g(the)g Fq(~id)h +Fv(ar)o(gument)c(is)k(passed)g(to)f(the)g(object,)g(the)g(created)f +(resolv)o(er)g(accepts)396 3777 y(only)h(this)g(ID.)g(Otherwise)g(all)h +(IDs)g(are)f(accepted.)f(-)h(Once)g(the)g(resolv)o(er)f(has)i(been)e +(cloned,)g(it)i(does)f(not)g(accept)g(an)o(y)396 3885 +y(ID.)g(This)h(means)f(that)g(this)h(resolv)o(er)e(cannot)g(handle)g +(inner)g(references)g(to)h(e)o(xternal)f(entities.)i(Note)f(that)g(you) +f(can)396 3993 y(combine)g(this)i(resolv)o(er)e(with)h(another)f +(resolv)o(er)g(that)h(can)g(handle)f(inner)g(references)g(\(such)h(as)h +(resolv)o(e_as_\002le\);)396 4101 y(see)g(class)g('combine')d(belo)n(w) +-5 b(.)19 b(-)i(If)f(you)f(pass)i(the)f Fq(~fixenc)f +Fv(ar)o(gument,)f(the)i(encoding)e(of)i(the)g(string)g(is)h(set)g(to)g +(the)396 4209 y(passed)f(v)n(alue,)g(re)o(gardless)e(of)i(an)o(y)g +(auto-recognition)c(or)k(an)o(y)f(XML)i(declaration.)396 +4430 y Fq(class)44 b(resolve_read_any_string)d(:)576 +4527 y(string_of_id:\(ext_id)g(->)k(\(string)e(*)i(encoding)e +(option\)\))h(->)755 4625 y(resolver)p Black 3800 5278 +a Fr(79)p Black eop +%%Page: 80 80 +80 79 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(This)h(resolv)o(er)e +(calls)h(the)h(function)d Fq(~string_of_id)h Fv(to)h(get)g(the)g +(string)g(for)g(the)g(passed)g Fq(ext_id)p Fv(.)g(This)g(function)396 +687 y(must)g(either)g(return)f(the)i(string)e(and)h(the)g(encoding,)e +(or)i(it)h(must)f(f)o(ail)h(with)f(Not_competent.)e(The)h(function)g +(must)396 795 y(return)g Fq(None)h Fv(as)h(encoding)d(if)j(the)f(def)o +(ault)g(mechanism)e(to)j(recognize)d(the)i(encoding)f(should)g(be)h +(used.)g(It)g(must)396 903 y(return)f Fq(Some)44 b(e)21 +b Fv(if)g(it)f(is)i(already)d(kno)n(wn)f(that)j(the)f(encoding)e(of)i +(the)g(string)g(is)h Fq(e)p Fv(.)396 1124 y Fq(class)44 +b(resolve_as_file)f(:)576 1222 y(?file_prefix:[)f(`Not_recognized)g(|)j +(`Allowed)f(|)g(`Required)g(])g(->)576 1319 y(?host_prefix:[)e +(`Not_recognized)g(|)j(`Allowed)f(|)g(`Required)g(])g(->)576 +1416 y(?system_encoding:encoding)c(->)576 1513 y(?url_of_id:\(ext_id)h +(->)k(Neturl.url\))e(->)576 1610 y(?channel_of_url:)f(\(Neturl.url)h +(->)h(\(in_channel)f(*)i(encoding)e(option\)\))h(->)576 +1707 y(unit)g(->)755 1804 y(resolver)396 1995 y Fv(Reads)21 +b(from)e(the)h(local)g(\002le)h(system.)f(Ev)o(ery)f(\002le)i(name)f +(is)h(interpreted)d(as)j(\002le)g(name)f(of)f(the)i(local)f(\002le)h +(system,)f(and)396 2103 y(the)g(referred)f(\002le)i(is)g(read.)396 +2253 y(The)f(full)g(form)f(of)h(a)h(\002le)g(URL)g(is:)g +(\002le://host/path,)e(where)h('host')f(speci\002es)i(the)f(host)g +(system)g(where)g(the)g(\002le)396 2361 y(identi\002ed)g('path')f +(resides.)h(host)g(=)g("")h(or)f(host)g(=)h("localhost")e(are)h +(accepted;)f(other)h(v)n(alues)f(will)i(raise)396 2468 +y(Not_competent.)d(The)i(standard)f(for)g(\002le)i(URLs)g(is)g +(de\002ned)e(in)i(RFC)g(1738.)396 2618 y(Option)f Fq(~file_prefix)p +Fv(:)e(Speci\002es)j(ho)n(w)f(the)g("\002le:")h(pre\002x)e(of)h(\002le) +h(names)f(is)h(handled:)p Black 396 2850 a Ft(\225)p +Black 60 w Fq(`Not_recognized:)p Fv(The)c(pre\002x)j(is)h(not)f +(recognized.)p Black 396 2958 a Ft(\225)p Black 60 w +Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h(allo)n(wed)e(b)n(ut)i(not)f +(required)e(\(the)i(def)o(ault\).)p Black 396 3066 a +Ft(\225)p Black 60 w Fq(`Required:)f Fv(The)h(pre\002x)g(is)h +(required.)396 3257 y(Option)f Fq(~host_prefix:)e Fv(Speci\002es)j(ho)n +(w)e(the)i("//host")f(phrase)f(of)h(\002le)h(names)f(is)h(handled:)p +Black 396 3490 a Ft(\225)p Black 60 w Fq(`Not_recognized:)p +Fv(The)c(pre\002x)j(is)h(not)f(recognized.)p Black 396 +3598 a Ft(\225)p Black 60 w Fq(`Allowed:)g Fv(The)f(pre\002x)h(is)h +(allo)n(wed)e(b)n(ut)i(not)f(required)e(\(the)i(def)o(ault\).)p +Black 396 3706 a Ft(\225)p Black 60 w Fq(`Required:)f +Fv(The)h(pre\002x)g(is)h(required.)396 3896 y(Option)f +Fq(~system_encoding:)e Fv(Speci\002es)i(the)g(encoding)e(of)i(\002le)h +(names)f(of)g(the)g(local)g(\002le)h(system.)f(Def)o(ault:)396 +4004 y(UTF-8.)396 4154 y(Options)g Fq(~url_of_id)p Fv(,)f +Fq(~channel_of_url)p Fv(:)f(Not)i(for)g(the)g(casual)g(user!)396 +4376 y Fq(class)44 b(combine)g(:)576 4473 y(?prefer:resolver)e(->)576 +4570 y(resolver)h(list)h(->)755 4667 y(resolver)p Black +3800 5278 a Fr(80)p Black eop +%%Page: 81 81 +81 80 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Combines)g(se)n(v)o +(eral)f(resolv)o(er)g(objects.)h(If)g(a)h(concrete)e(entity)g(with)i +(an)f Fq(ext_id)g Fv(is)h(to)f(be)g(opened,)f(the)h(combined)396 +687 y(resolv)o(er)f(tries)i(the)f(contained)f(resolv)o(ers)g(in)h(turn) +g(until)g(a)g(resolv)o(er)f(accepts)h(opening)f(the)h(entity)g(\(i.e.)g +(it)g(does)g(not)396 795 y(raise)h(Not_competent)c(on)j(open_in\).)396 +944 y(Clones:)h(If)f(the)g('clone')f(method)g(is)i(in)m(v)n(ok)o(ed)d +(before)h('open_in',)e(all)k(contained)e(resolv)o(ers)g(are)h(cloned)f +(separately)396 1052 y(and)h(again)f(combined.)f(If)i(the)g('clone')f +(method)g(is)i(in)m(v)n(ok)o(ed)e(after)g('open_in')f(\(i.e.)i(while)g +(the)g(resolv)o(er)f(is)i(open\),)396 1160 y(additionally)e(the)h +(clone)f(of)h(the)h(acti)n(v)o(e)e(resolv)o(er)g(is)i(\003agged)f(as)g +(being)g(preferred,)d(i.e.)k(it)f(is)i(tried)e(\002rst.)-2 +1662 y Fx(4.3.)39 b(The)g(DTD)g(c)m(lasses)396 1841 y +Fr(Sorry)-5 b(,)21 b(not)f(yet)g(written.)h(P)-7 b(erhaps)20 +b(the)g(interface)g(de\002nition)e(of)j(Pxp_dtd)d(e)n(xpr)m(esses)j +(the)f(same:)396 2063 y Fq(\(****************************************)o +(******)o(******)o(******)o(******)o(*****)o(*\))396 +2160 y(\(*)3048 b(*\))396 2257 y(\(*)45 b(Pxp_dtd:)2643 +b(*\))396 2354 y(\(*)224 b(Object)44 b(model)g(of)g(document)g(type)g +(declarations)939 b(*\))396 2452 y(\(*)3048 b(*\))396 +2549 y(\(****************************************)o(******)o(******)o +(******)o(******)o(*****)o(*\))396 2743 y(\(*)45 b +(======================================)o(======)o(======)o(======)o +(======)o(=====)o(===)441 2840 y(*)g(OVERVIEW)441 2937 +y(*)441 3034 y(*)g(class)f(dtd)g(...............)e(represents)i(the)g +(whole)g(DTD,)g(including)f(element)441 3132 y(*)1210 +b(declarations,)43 b(entity)h(declarations,)f(notation)441 +3229 y(*)1210 b(declarations,)43 b(and)h(processing)g(instructions)441 +3326 y(*)h(class)f(dtd_element)f(.......)g(represents)h(an)g(element)g +(declaration)f(consisting)441 3423 y(*)1210 b(of)45 b(a)g(content)e +(model)h(and)h(an)f(attribute)f(list)441 3520 y(*)1210 +b(declaration)441 3617 y(*)45 b(class)f(dtd_notation)f(......)g +(represents)h(a)g(notation)g(declaration)441 3714 y(*)h(class)f +(proc_instruction)e(..)i(represents)g(a)g(processing)f(instruction)441 +3811 y(*)i(======================================)o(======)o(======)o +(======)o(======)o(=====)o(===)441 3909 y(*)441 4006 +y(*\))396 4297 y(class)f(dtd)h(:)486 4394 y(\(*)f(Creation:)531 +4491 y(*)134 b(new)44 b(dtd)531 4589 y(*)g(creates)g(a)h(new,)f(empty)g +(DTD)g(object)g(without)g(any)g(declaration,)f(without)g(a)i(root)531 +4686 y(*)f(element,)g(without)g(an)g(ID.)531 4783 y(*\))p +Black 3800 5278 a Fr(81)p Black eop +%%Page: 82 82 +82 81 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 486 579 a Fq +(Pxp_types.collect_warnings)40 b(-)p Fo(>)486 676 y Fq +(Pxp_types.rep_encoding)h(-)p Fo(>)486 773 y Fq(object)576 +870 y(method)i(root)i(:)f(string)g(option)665 967 y(\(*)h(get)f(the)g +(name)h(of)f(the)g(root)h(element)e(if)i(present)e(*\))576 +1162 y(method)g(set_root)h(:)h(string)e(-)p Fo(>)i Fq(unit)665 +1259 y(\(*)g(set)f(the)g(name)h(of)f(the)g(root)h(element.)e(This)h +(method)g(can)g(be)h(invoked)710 1356 y(*)g(only)f(once)710 +1453 y(*\))576 1647 y(method)f(id)i(:)g(Pxp_types.dtd_id)d(option)665 +1745 y(\(*)j(get)f(the)g(identifier)g(for)g(this)g(DTD)g(*\))576 +1939 y(method)f(set_id)h(:)h(Pxp_types.dtd_id)d(-)p Fo(>)i +Fq(unit)665 2036 y(\(*)h(set)f(the)g(identifier.)f(This)i(method)e(can) +i(be)f(invoked)g(only)g(once)g(*\))576 2230 y(method)f(encoding)h(:)h +(Pxp_types.rep_encoding)665 2327 y(\(*)g(returns)e(the)i(encoding)e +(used)h(for)h(character)e(representation)g(*\))576 2619 +y(method)g(allow_arbitrary)g(:)h(unit)665 2716 y(\(*)h(After)f(this)g +(method)g(has)g(been)g(invoked,)g(the)g(ob-)396 2813 +y(ject)g(changes)g(its)g(behaviour:)710 2910 y(*)h(-)f(elements)g(and)g +(notations)g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h +(an)710 3007 y(*)134 b(arbitrary)44 b(way;)g(the)g(methods)g("element") +f(and)i("notation")e(indicate)g(this)710 3104 y(*)134 +b(by)45 b(raising)f(Undeclared)f(instead)g(of)i(Validation_error.)710 +3202 y(*\))576 3396 y(method)e(disallow_arbitrary)f(:)j(unit)576 +3590 y(method)e(arbitrary_allowed)f(:)j(bool)665 3687 +y(\(*)g(Returns)e(whether)h(arbitrary)f(contents)h(are)g(allowed)g(or)g +(not.)h(*\))576 3882 y(method)e(standalone_declaration)f(:)i(bool)665 +3979 y(\(*)h(Whether)e(there)h(is)h(a)g('standalone')d(declaration)h +(or)i(not.)f(Strictly)710 4076 y(*)h(speaking,)e(this)h(declaration)f +(is)i(not)f(part)g(of)h(the)f(DTD,)g(but)h(it)f(is)710 +4173 y(*)h(included)e(here)h(because)g(of)h(practical)e(reasons.)710 +4270 y(*)i(If)f(not)h(set,)f(this)g(property)f(defaults)h(to)g +('false'.)710 4367 y(*\))576 4561 y(method)f +(set_standalone_declaration)e(:)k(bool)f(-)p Fo(>)g Fq(unit)665 +4659 y(\(*)h(Sets)f(the)g('standalone')f(declaration.)g(*\))p +Black 3800 5278 a Fr(82)p Black eop +%%Page: 83 83 +83 82 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43 +b(add_element)g(:)i(dtd_element)e(-)p Fo(>)h Fq(unit)665 +676 y(\(*)h(add)f(the)g(given)g(element)g(declaration)f(to)i(this)f +(DTD.)g(Raises)g(Not_found)710 773 y(*)h(if)f(there)g(is)h(already)e +(an)i(element)f(declaration)f(with)h(the)g(same)g(name.)710 +870 y(*\))576 1065 y(method)f(add_gen_entity)g(:)i(Pxp_entity.entity)d +(-)p Fo(>)i Fq(bool)g(-)p Fo(>)g Fq(unit)665 1162 y(\(*)h +(add_gen_entity)d(e)j(extdecl:)710 1259 y(*)g(add)f(the)g(entity)g('e') +h(as)f(general)g(entity)g(to)g(this)g(DTD)h(\(general)e(entities)710 +1356 y(*)i(are)f(those)g(represented)f(by)i(&name;\).)e(If)i(there)f +(is)g(already)g(a)g(declaration)710 1453 y(*)h(with)f(the)g(same)g +(name,)g(the)h(second)f(definition)f(is)h(ignored;)g(as)g(excep-)396 +1550 y(tion)g(from)710 1647 y(*)h(this)f(rule,)g(entities)f(with)i +(names)f("lt",)g("gt",)g("amp",)f("quot",)h(and)g("apos")710 +1745 y(*)h(may)f(only)g(be)h(redeclared)e(with)h(a)h(definition)e(that) +h(is)h(equivalent)e(to)h(the)710 1842 y(*)h(standard)e(definition;)g +(otherwise)h(a)g(Validation_error)e(is)j(raised.)710 +1939 y(*)710 2036 y(*)g('extdecl':)e('true')h(indicates)f(that)h(the)h +(entity)e(declaration)g(occurs)h(in)710 2133 y(*)h(an)f(external)g +(entity.)f(\(Used)h(for)h(the)f(standalone)f(check.\))710 +2230 y(*\))576 2424 y(method)g(add_par_entity)g(:)i(Pxp_entity.entity)d +(-)p Fo(>)i Fq(unit)665 2522 y(\(*)h(add)f(the)g(given)g(entity)g(as)h +(parameter)e(entity)h(to)g(this)h(DTD)f(\(parameter)710 +2619 y(*)h(entities)e(are)i(those)f(represented)f(by)h(\045name;\).)g +(If)g(there)g(is)h(already)e(a)710 2716 y(*)i(declaration)e(with)h(the) +g(same)g(name,)g(the)h(second)f(definition)f(is)h(ignored.)710 +2813 y(*\))576 3007 y(method)f(add_notation)g(:)i(dtd_notation)e(-)p +Fo(>)h Fq(unit)665 3104 y(\(*)h(add)f(the)g(given)g(notation)g(to)g +(this)h(DTD.)f(If)g(there)g(is)h(al-)396 3202 y(ready)f(a)h +(declaration)710 3299 y(*)g(with)f(the)g(same)g(name,)g(a)h +(Validation_error)d(is)j(raised.)710 3396 y(*\))576 3590 +y(method)e(add_pinstr)h(:)g(proc_instruction)e(-)p Fo(>)j +Fq(unit)665 3687 y(\(*)g(add)f(the)g(given)g(processing)g(instruction)f +(to)h(this)g(DTD.)g(*\))576 3882 y(method)f(element)h(:)h(string)f(-)p +Fo(>)g Fq(dtd_element)665 3979 y(\(*)h(looks)f(up)g(the)h(element)e +(declaration)g(with)h(the)h(given)f(name.)g(Raises)710 +4076 y(*)h(Validation_error)d(if)i(the)h(element)e(can-)396 +4173 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710 +4270 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised) +f(instead.\))710 4367 y(*\))576 4561 y(method)f(element_names)g(:)i +(string)f(list)665 4659 y(\(*)h(returns)e(the)i(list)f(of)g(the)h +(names)f(of)g(all)h(element)e(declarations.)g(*\))576 +4853 y(method)g(gen_entity)h(:)g(string)g(-)p Fo(>)g +Fq(\(Pxp_entity.entity)e(*)j(bool\))p Black 3800 5278 +a Fr(83)p Black eop +%%Page: 84 84 +84 83 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45 +b(let)f(e,)h(extdecl)e(=)i(obj)f(#)h(gen_entity)e(n:)710 +676 y(*)i(looks)f(up)g(the)h(general)e(entity)h('e')g(with)h(the)f +(name)g('n'.)g(Raises)710 773 y(*)h(WF_error)e(if)i(the)f(entity)g +(cannot)g(be)g(found.)710 870 y(*)h('extdecl':)e(indicates)g(whether)h +(the)g(entity)g(declaration)f(occured)h(in)g(an)710 967 +y(*)h(external)e(entity.)710 1065 y(*\))576 1259 y(method)g +(gen_entity_names)g(:)h(string)g(list)665 1356 y(\(*)h(returns)e(the)i +(list)f(of)g(all)h(general)e(entity)h(names)g(*\))576 +1550 y(method)f(par_entity)h(:)g(string)g(-)p Fo(>)g +Fq(Pxp_entity.entity)665 1647 y(\(*)h(looks)f(up)g(the)h(parameter)e +(entity)h(with)g(the)g(given)g(name.)g(Raises)710 1745 +y(*)h(WF_error)e(if)i(the)f(entity)g(cannot)g(be)g(found.)710 +1842 y(*\))576 2036 y(method)f(par_entity_names)g(:)h(string)g(list)665 +2133 y(\(*)h(returns)e(the)i(list)f(of)g(all)h(parameter)e(entity)h +(names)g(*\))576 2327 y(method)f(notation)h(:)h(string)e(-)p +Fo(>)i Fq(dtd_notation)665 2424 y(\(*)g(looks)f(up)g(the)h(notation)e +(declaration)g(with)h(the)h(given)f(name.)g(Raises)710 +2522 y(*)h(Validation_error)d(if)i(the)h(notation)e(can-)396 +2619 y(not)i(be)f(found.)g(\(If)g("allow_arbitrary")710 +2716 y(*)h(has)f(been)g(invoked)g(before,)g(Unrestricted)e(is)j(raised) +f(instead.\))710 2813 y(*\))576 3007 y(method)f(notation_names)g(:)i +(string)e(list)665 3104 y(\(*)i(Returns)e(the)i(list)f(of)g(the)h +(names)f(of)g(all)h(added)f(notations)f(*\))576 3299 +y(method)g(pinstr)h(:)h(string)f(-)p Fo(>)g Fq(proc_instruction)e(list) +665 3396 y(\(*)j(looks)f(up)g(all)h(processing)e(instructions)g(with)h +(the)g(given)g(target.)710 3493 y(*)h(The)f("target")g(is)g(the)g +(identifier)g(following)f(")p Fo(<)p Fq(?".)710 3590 +y(*)i(Note:)f(It)g(is)h(not)f(possible)g(to)g(find)g(out)h(the)f(exact) +g(position)f(of)i(the)710 3687 y(*)g(processing)e(instruction.)710 +3784 y(*\))576 3979 y(method)g(pinstr_names)g(:)i(string)f(list)665 +4076 y(\(*)h(Returns)e(the)i(list)f(of)g(the)h(names)f(\(targets\))f +(of)i(all)f(added)g(pinstrs)f(*\))576 4270 y(method)g(validate)h(:)h +(unit)665 4367 y(\(*)g(ensures)e(that)i(the)f(DTD)g(is)h(valid.)f(This) +g(method)g(is)g(optimized)f(such)h(that)710 4464 y(*)h(actual)f +(validation)f(is)h(only)g(performed)g(if)g(DTD)h(has)f(changed.)710 +4561 y(*)h(If)f(the)h(DTD)f(is)g(invalid,)g(mostly)g(a)g +(Validation_error)f(is)h(raised,)710 4659 y(*)h(but)f(other)g +(exceptions)f(are)i(possible,)e(too.)710 4756 y(*\))p +Black 3800 5278 a Fr(84)p Black eop +%%Page: 85 85 +85 84 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43 +b(only_deterministic_models)e(:)k(unit)665 676 y(\(*)g(Succeeds)e(if)i +(all)f(regexp)g(content)g(models)f(are)i(deterministic.)710 +773 y(*)g(Otherwise)e(Validation_error.)710 870 y(*\))576 +1065 y(method)g(write)h(:)h(Pxp_types.output_stream)c(-)p +Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(bool)f(-)396 +1162 y Fo(>)h Fq(unit)665 1259 y(\(*)g(write_compact_as_latin1)c(os)j +(enc)h(doctype:)710 1356 y(*)g(Writes)f(the)g(DTD)g(as)h('enc'-encoded) +d(string)i(to)h('os'.)f(If)g('doctype',)f(a)710 1453 +y(*)i(DTD)f(like)g Fo(<)p Fq(!DOCTYPE)f(root)i([)f(...)h(])p +Fo(>)f Fq(is)g(written.)g(If)g('not)h(doctype',)710 1550 +y(*)g(only)f(the)g(declarations)f(are)h(written)g(\(the)g(material)g +(within)g(the)710 1647 y(*)h(square)f(brackets\).)710 +1745 y(*\))576 1939 y(method)f(write_compact_as_latin1)e(:)k +(Pxp_types.output_stream)c(-)p Fo(>)j Fq(bool)h(-)p Fo(>)f +Fq(unit)665 2036 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only)h(to)h +(keep)f(compatibility)f(with)710 2133 y(*)i(older)f(versions)f(of)i +(the)f(parser)710 2230 y(*\))576 2522 y +(\(*---------------------------*\))576 2619 y(method)f(invalidate)h(:)g +(unit)665 2716 y(\(*)h(INTERNAL)e(METHOD)h(*\))576 2813 +y(method)f(warner)h(:)h(Pxp_types.collect_warnings)665 +2910 y(\(*)g(INTERNAL)e(METHOD)h(*\))486 3007 y(end)396 +3396 y(\(*)h(--------------------------------------)o(------)o(---)39 +b(*\))396 3590 y(and)45 b(dtd_element)e(:)h(dtd)h(-)p +Fo(>)f Fq(string)g(-)p Fo(>)486 3687 y Fq(\(*)g(Creation:)531 +3784 y(*)134 b(new)44 b(dtd_element)f(init_dtd)h(init_name:)531 +3882 y(*)g(creates)g(a)h(new)f(dtd_element)f(object)h(for)g(init_dtd)g +(with)g(init_name.)531 3979 y(*)g(The)h(strings)e(are)i(represented)e +(in)h(the)h(same)f(encoding)f(as)i(init_dtd.)531 4076 +y(*\))486 4173 y(object)576 4367 y(method)e(name)i(:)f(string)665 +4464 y(\(*)h(returns)e(the)i(name)f(of)g(the)h(declared)e(element)h +(*\))576 4659 y(method)f(externally_declared)f(:)j(bool)665 +4756 y(\(*)g(returns)e(whether)h(the)g(element)g(declaration)f(occurs)h +(in)g(an)h(external)710 4853 y(*)g(entity.)p Black 3800 +5278 a Fr(85)p Black eop +%%Page: 86 86 +86 85 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 710 579 a Fq(*\))576 +773 y(method)43 b(content_model)g(:)i(Pxp_types.content_model_type)665 +870 y(\(*)g(get)f(the)g(content)g(model)g(of)h(this)f(element)f +(declaration,)g(or)i(Unspecified)e(*\))576 1065 y(method)g(content_dfa) +g(:)i(Pxp_dfa.dfa_definition)c(option)665 1162 y(\(*)k(return)f(the)g +(DFA)g(of)h(the)f(content)g(model)g(if)g(there)g(is)h(a)f(DFA,)h(or)f +(None.)710 1259 y(*)h(A)f(DFA)h(exists)f(only)g(for)g(regexp)g(style)g +(content)g(models)f(which)h(are)710 1356 y(*)h(deterministic.)710 +1453 y(*\))576 1647 y(method)e(set_cm_and_extdecl)f(:)j +(Pxp_types.content_model_type)40 b(-)p Fo(>)k Fq(bool)h(-)p +Fo(>)f Fq(unit)665 1745 y(\(*)h(set_cm_and_extdecl)d(cm)i(extdecl:)710 +1842 y(*)h(set)f(the)g(content)g(model)g(to)h('cm'.)f(Once)g(the)g +(content)g(model)g(is)g(not)710 1939 y(*)h(Unspecified,)e(it)h(cannot)g +(be)g(set)h(to)f(a)h(different)e(value)h(again.)710 2036 +y(*)h(Furthermore,)e(it)h(is)h(set)f(whether)g(the)g(element)g(occurs)f +(in)i(an)f(external)710 2133 y(*)h(entity)f(\('extdecl'\).)710 +2230 y(*\))576 2424 y(method)f(encoding)h(:)h(Pxp_types.rep_encoding) +665 2522 y(\(*)g(Return)f(the)g(encoding)f(of)i(the)f(strings)g(*\))576 +2716 y(method)f(allow_arbitrary)g(:)h(unit)665 2813 y(\(*)h(After)f +(this)g(method)g(has)g(been)g(invoked,)g(the)g(ob-)396 +2910 y(ject)g(changes)g(its)g(behaviour:)710 3007 y(*)h(-)f(attributes) +g(that)g(have)g(not)g(been)g(added)g(may)h(be)f(used)g(in)h(an)710 +3104 y(*)134 b(arbitrary)44 b(way;)g(the)g(method)g("attribute")f +(indicates)g(this)710 3202 y(*)134 b(by)45 b(raising)f(Undeclared)f +(instead)g(of)i(Validation_error.)710 3299 y(*\))576 +3493 y(method)e(disallow_arbitrary)f(:)j(unit)576 3687 +y(method)e(arbitrary_allowed)f(:)j(bool)665 3784 y(\(*)g(Returns)e +(whether)h(arbitrary)f(attributes)h(are)g(allowed)g(or)g(not.)g(*\))576 +3979 y(method)f(attribute)h(:)g(string)g(-)p Fo(>)1517 +4076 y Fq(Pxp_types.att_type)e(*)j(Pxp_types.att_default)665 +4173 y(\(*)g(get)f(the)g(type)h(and)f(default)g(value)g(of)g(a)h +(declared)e(attribute,)g(or)i(raise)710 4270 y(*)g(Validation_error)d +(if)i(the)h(attribute)e(does)h(not)h(exist.)710 4367 +y(*)g(If)f('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g +(raised)g(instead)710 4464 y(*)h(of)f(Validation_error.)710 +4561 y(*\))576 4756 y(method)f +(attribute_violates_standalone_declaration)38 b(:)1069 +4853 y(string)44 b(-)p Fo(>)g Fq(string)g(option)g(-)p +Fo(>)g Fq(bool)p Black 3798 5278 a Fr(86)p Black eop +%%Page: 87 87 +87 86 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 665 579 a Fq(\(*)45 +b(attribute_violates_standalone_declarat)o(ion)39 b(name)44 +b(v:)710 676 y(*)h(Checks)f(whether)f(the)i(attribute)e('name')h +(violates)f(the)i("standalone")710 773 y(*)g(declaration)e(if)h(it)h +(has)f(value)g('v'.)710 870 y(*)h(The)f(method)g(returns)g(true)g(if:) +710 967 y(*)h(-)f(The)h(attribute)e(declaration)g(occurs)h(in)g(an)h +(external)e(entity,)710 1065 y(*)i(and)f(if)h(one)f(of)g(the)h(two)f +(conditions)f(holds:)710 1162 y(*)i(-)f(v)h(=)g(None,)f(and)g(there)g +(is)h(a)f(default)g(for)g(the)h(attribute)e(value)710 +1259 y(*)i(-)f(v)h(=)g(Some)f(s,)g(and)h(the)f(type)g(of)h(the)f +(attribute)f(is)i(not)f(CDATA,)710 1356 y(*)134 b(and)45 +b(s)f(changes)g(if)h(normalized)e(according)g(to)i(the)f(rules)g(of)g +(the)710 1453 y(*)134 b(attribute)44 b(type.)710 1550 +y(*)710 1647 y(*)h(The)f(method)g(raises)g(Validation_error)e(if)i(the) +h(attribute)e(does)h(not)g(exist.)710 1745 y(*)h(If)f +('arbitrary_allowed',)e(the)i(exception)f(Undeclared)h(is)g(raised)g +(instead)710 1842 y(*)h(of)f(Validation_error.)710 1939 +y(*\))576 2133 y(method)f(attribute_names)g(:)h(string)g(list)665 +2230 y(\(*)h(get)f(the)g(list)h(of)f(all)g(declared)g(attributes)f(*\)) +576 2424 y(method)g(names_of_required_attributes)e(:)j(string)g(list) +665 2522 y(\(*)h(get)f(the)g(list)h(of)f(all)g(attributes)g(that)g(are) +g(specified)f(as)i(required)710 2619 y(*)g(attributes)710 +2716 y(*\))576 2910 y(method)e(id_attribute_name)f(:)j(string)f(option) +665 3007 y(\(*)h(Returns)e(the)i(name)f(of)g(the)h(attribute)e(with)h +(type)g(ID,)h(or)f(None.)g(*\))576 3202 y(method)f +(idref_attribute_names)f(:)i(string)g(list)665 3299 y(\(*)h(Returns)e +(the)i(names)f(of)g(the)h(attributes)e(with)h(type)g(IDREF)g(or)h +(IDREFS.)e(*\))576 3493 y(method)g(add_attribute)g(:)i(string)f(-)p +Fo(>)1607 3590 y Fq(Pxp_types.att_type)e(-)p Fo(>)531 +3687 y Fq(Pxp_types.att_default)f(-)p Fo(>)531 3784 y +Fq(bool)j(-)p Fo(>)620 3882 y Fq(unit)665 3979 y(\(*)h(add_attribute)d +(name)j(type)f(default)f(extdecl:)710 4076 y(*)i(add)f(an)h(attribute)e +(declaration)g(for)h(an)h(attribute)e(with)h(the)h(given)e(name,)710 +4173 y(*)i(type,)f(and)g(default)g(value.)g(If)g(there)g(is)h(more)f +(than)g(one)g(declaration)f(for)710 4270 y(*)i(an)f(attribute)g(name,)g +(the)g(first)g(declara-)396 4367 y(tion)g(counts;)g(the)g(other)g +(declarations)710 4464 y(*)h(are)f(ignored.)710 4561 +y(*)h('extdecl':)e(if)h(true,)g(the)h(attribute)e(declaration)g(occurs) +h(in)g(an)h(external)710 4659 y(*)g(entity.)e(This)i(property)e(is)i +(used)f(to)g(check)g(the)h("standalone")d(attribute.)710 +4756 y(*\))p Black 3797 5278 a Fr(87)p Black eop +%%Page: 88 88 +88 87 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 576 579 a Fq(method)43 +b(validate)h(:)h(unit)665 676 y(\(*)g(checks)f(whether)f(this)h +(element)g(declaration)f(\(i.e.)h(the)g(content)g(model)g(and)710 +773 y(*)h(all)f(attribute)f(declarations\))g(is)i(valid)f(for)g(the)g +(associated)f(DTD.)710 870 y(*)i(Raises)f(mostly)f(Validation_error)g +(if)h(the)g(validation)g(fails.)710 967 y(*\))576 1162 +y(method)f(write)h(:)h(Pxp_types.output_stream)c(-)p +Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665 +1259 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710 +1356 y(*)h(Writes)f(the)g Fo(<)p Fq(!ELEMENT)f(...)h +Fo(>)h Fq(declaration)e(to)h('os')h(as)f('enc'-)396 1453 +y(encoded)g(string.)710 1550 y(*\))576 1745 y(method)f +(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p +Fo(>)j Fq(unit)665 1842 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only) +h(to)h(keep)f(compatibility)f(with)710 1939 y(*)i(older)f(versions)f +(of)i(the)f(parser)710 2036 y(*\))486 2133 y(end)396 +2327 y(\(*)h(--------------------------------------)o(------)o(---)39 +b(*\))396 2522 y(and)45 b(dtd_notation)d(:)j(string)f(-)p +Fo(>)g Fq(Pxp_types.ext_id)e(-)p Fo(>)j Fq(Pxp_types.rep_encoding)c(-)p +Fo(>)486 2619 y Fq(\(*)j(Creation:)531 2716 y(*)179 b(new)44 +b(dtd_notation)f(a_name)h(an_external_ID)e(init_encoding)531 +2813 y(*)i(creates)g(a)h(new)f(dtd_notation)f(object)h(with)g(the)g +(given)g(name)g(and)h(the)f(given)531 2910 y(*)g(external)g(ID.)531 +3007 y(*\))486 3104 y(object)576 3202 y(method)f(name)i(:)f(string)576 +3299 y(method)f(ext_id)h(:)h(Pxp_types.ext_id)576 3396 +y(method)e(encoding)h(:)h(Pxp_types.rep_encoding)576 +3590 y(method)e(write)h(:)h(Pxp_types.output_stream)c(-)p +Fo(>)j Fq(Pxp_types.encoding)e(-)p Fo(>)j Fq(unit)665 +3687 y(\(*)g(write_compact_as_latin1)c(os)j(enc:)710 +3784 y(*)h(Writes)f(the)g Fo(<)p Fq(!NOTATION)f(...)h +Fo(>)h Fq(declaration)e(to)h('os')g(as)h('enc'-encoded)710 +3882 y(*)g(string.)710 3979 y(*\))576 4173 y(method)e +(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p +Fo(>)j Fq(unit)665 4270 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only) +h(to)h(keep)f(compatibility)f(with)710 4367 y(*)i(older)f(versions)f +(of)i(the)f(parser)710 4464 y(*\))486 4659 y(end)396 +4853 y(\(*)h(--------------------------------------)o(------)o(---)39 +b(*\))p Black 3800 5278 a Fr(88)p Black eop +%%Page: 89 89 +89 88 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 676 a Fq(and)45 +b(proc_instruction)d(:)i(string)g(-)p Fo(>)h Fq(string)e(-)p +Fo(>)i Fq(Pxp_types.rep_encoding)c(-)p Fo(>)486 773 y +Fq(\(*)j(Creation:)531 870 y(*)134 b(new)44 b(proc_instruction)f +(a_target)g(a_value)531 967 y(*)h(creates)g(a)h(new)f(proc_instruction) +e(object)i(with)g(the)h(given)f(target)f(string)h(and)531 +1065 y(*)g(the)h(given)f(value)g(string.)531 1162 y(*)g(Note:)g(A)h +(processing)e(instruction)g(is)i(written)e(as)i Fo(<)p +Fq(?target)e(value?)p Fo(>)p Fq(.)531 1259 y(*\))486 +1356 y(object)576 1453 y(method)g(target)h(:)h(string)576 +1550 y(method)e(value)h(:)h(string)576 1647 y(method)e(encoding)h(:)h +(Pxp_types.rep_encoding)576 1842 y(method)e(write)h(:)h +(Pxp_types.output_stream)c(-)p Fo(>)j Fq(Pxp_types.encoding)e(-)p +Fo(>)j Fq(unit)665 1939 y(\(*)g(write)f(os)g(enc:)710 +2036 y(*)h(Writes)f(the)g Fo(<)p Fq(?...?)p Fo(>)f Fq(PI)i(to)f('os')h +(as)f('enc'-encoded)f(string.)710 2133 y(*\))576 2327 +y(method)g(write_compact_as_latin1)e(:)k(Pxp_types.output_stream)c(-)p +Fo(>)j Fq(unit)665 2424 y(\(*)h(DEPRECATED)e(METHOD;)h(included)f(only) +h(to)h(keep)f(compatibility)f(with)710 2522 y(*)i(older)f(versions)f +(of)i(the)f(parser)710 2619 y(*\))576 2813 y(method)f(parse_pxp_option) +g(:)h(\(string)g(*)h(string)e(*)i(\(string)f(*)g(string\))g(list\))665 +2910 y(\(*)h(Parses)f(a)g(PI)h(containing)e(a)i(PXP)f(option.)g(Such)g +(PIs)g(are)g(formed)g(like:)710 3007 y(*)134 b Fo(<)p +Fq(?target)44 b(option-name)f(option-att="value")f(option-att="value")f +(...)k(?)p Fo(>)710 3104 y Fq(*)g(The)f(method)g(returns)g(a)g(triple) +710 3202 y(*)134 b(\(target,)44 b(option-name,)f([option-att,)g(value;) +g(...]\))710 3299 y(*)i(or)f(raises)g(Error.)710 3396 +y(*\))486 3590 y(end)396 3784 y(;;)-2 4286 y Fx(4.4.)39 +b(In)-6 b(v)l(oking)38 b(the)h(par)n(ser)396 4466 y Fv(Here)20 +b(a)h(description)e(of)h(Pxp_yacc.)-2 4794 y Fp(4.4.1.)35 +b(Defaults)p Black 3800 5278 a Fr(89)p Black eop +%%Page: 90 90 +90 89 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(The)g(follo)n(wing)f +(def)o(aults)g(are)i(a)n(v)n(ailable:)396 759 y Fq(val)45 +b(default_config)d(:)j(config)396 856 y(val)g(default_extension)d(:)i +(\('a)h(node)f(extension\))f(as)h('a)396 953 y(val)h(default_spec)d(:)j +(\('a)f(node)h(extension)e(as)h('a\))h(spec)-2 1406 y +Fp(4.4.2.)35 b(P)l(ar)n(sing)f(functions)396 1574 y Fv(In)20 +b(the)g(follo)n(wing,)f(the)h(term)g("closed)g(document")e(refers)h(to) +i(an)f(XML)g(structure)f(lik)o(e)396 1754 y Fo(<)p Fq(!DOCTYPE)43 +b(...)i([)f Fn(declarations)f Fq(])i Fo(>)396 1851 y(<)p +Fn(root)p Fo(>)396 1948 y Fq(...)396 2045 y Fo(<)p Fq(/)p +Fn(root)p Fo(>)396 2236 y Fv(The)20 b(term)g("fragment")e(refers)i(to)g +(an)g(XML)h(structure)e(lik)o(e)396 2416 y Fo(<)p Fn(root)p +Fo(>)396 2513 y Fq(...)396 2611 y Fo(<)p Fq(/)p Fn(root)p +Fo(>)396 2802 y Fv(i.e.)h(only)g(to)g(one)g(isolated)g(element)f +(instance.)396 3023 y Fq(val)45 b(parse_dtd_entity)d(:)i(config)g(->)h +(source)f(->)g(dtd)396 3214 y Fv(P)o(arses)21 b(the)f(declarations)f +(which)h(are)g(contained)e(in)j(the)f(entity)-5 b(,)19 +b(and)h(returns)f(them)h(as)h Fq(dtd)f Fv(object.)396 +3436 y Fq(val)45 b(extract_dtd_from_document_entity)39 +b(:)45 b(config)f(->)g(source)g(->)g(dtd)396 3627 y Fv(Extracts)20 +b(the)g(DTD)h(from)e(a)h(closed)g(document.)e(Both)i(the)h(internal)e +(and)h(the)g(e)o(xternal)f(subsets)h(are)h(e)o(xtracted)d(and)396 +3735 y(combined)g(to)i(one)f Fq(dtd)h Fv(object.)f(This)h(function)e +(does)h(not)h(parse)f(the)h(whole)f(document,)f(b)n(ut)i(only)e(the)i +(parts)g(that)g(are)396 3843 y(necessary)g(to)g(e)o(xtract)f(the)i +(DTD.)396 4064 y Fq(val)45 b(parse_document_entity)c(:)576 +4161 y(?transform_dtd:\(dtd)g(->)k(dtd\))f(->)576 4259 +y(?id_index:\('ext)e(index\))i(->)576 4356 y(config)f(->)576 +4453 y(source)g(->)576 4550 y('ext)h(spec)g(->)755 4647 +y('ext)g(document)p Black 3800 5278 a Fr(90)p Black eop +%%Page: 91 91 +91 90 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(P)o(arses)h(a)g(closed) +e(document)g(and)g(v)n(alidates)h(it)h(against)e(the)i(DTD)f(that)g(is) +h(contained)e(in)h(the)h(document)d(\(internal)396 687 +y(and)i(e)o(xternal)f(subsets\).)h(The)g(option)f Fq(~transform_dtd)f +Fv(can)i(be)g(used)g(to)g(transform)f(the)h(DTD)h(in)f(the)g(document,) +396 795 y(and)g(to)g(use)h(the)f(transformed)e(DTD)i(for)g(v)n +(alidation.)e(If)i Fq(~id_index)g Fv(is)h(speci\002ed,)e(an)h(inde)o(x) +f(of)h(all)h(ID)f(attrib)n(utes)h(is)396 903 y(created.)396 +1124 y Fq(val)45 b(parse_wfdocument_entity)c(:)576 1222 +y(config)i(->)576 1319 y(source)g(->)576 1416 y('ext)h(spec)g(->)755 +1513 y('ext)g(document)396 1704 y Fv(P)o(arses)21 b(a)g(closed)e +(document,)f(b)n(ut)j(checks)e(it)i(only)e(on)h(well-formedness.)396 +1926 y Fq(val)45 b(parse_content_entity)86 b(:)576 2023 +y(?id_index:\('ext)42 b(index\))i(->)576 2120 y(config)f(->)576 +2217 y(source)g(->)576 2314 y(dtd)h(->)576 2411 y('ext)g(spec)g(->)755 +2508 y('ext)g(node)396 2699 y Fv(P)o(arses)21 b(a)g(fragment,)d(and)h +(v)n(alidates)h(the)g(element.)396 2921 y Fq(val)45 b +(parse_wfcontent_entity)c(:)576 3018 y(config)i(->)576 +3115 y(source)g(->)576 3212 y('ext)h(spec)g(->)755 3310 +y('ext)g(node)396 3500 y Fv(P)o(arses)21 b(a)g(fragment,)d(b)n(ut)i +(checks)g(it)g(only)g(on)g(well-formedness.)-2 3870 y +Fp(4.4.3.)35 b(Con\002guration)f(options)396 4110 y Fq(type)44 +b(config)g(=)576 4207 y({)g(warner)g(:)h(collect_warnings;)665 +4304 y(errors_with_line_numbers)c(:)k(bool;)665 4401 +y(enable_pinstr_nodes)d(:)j(bool;)665 4499 y(enable_super_root_node)c +(:)k(bool;)665 4596 y(enable_comment_nodes)d(:)i(bool;)665 +4693 y(encoding)g(:)g(rep_encoding;)665 4790 y +(recognize_standalone_declaration)c(:)k(bool;)p Black +3800 5278 a Fr(91)p Black eop +%%Page: 92 92 +92 91 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 665 579 a Fq +(store_element_positions)41 b(:)k(bool;)665 676 y(idref_pass)e(:)i +(bool;)665 773 y(validate_by_dfa)e(:)h(bool;)665 870 +y(accept_only_deterministic_models)c(:)k(bool;)665 967 +y(...)576 1065 y(})p Black 396 1422 a Ft(\225)p Black +60 w Fq(warner:)p Fv(The)19 b(parser)h(prints)f(w)o(arnings)h(by)f(in)m +(v)n(oking)f(the)j(method)d Fq(warn)j Fv(for)e(this)i(w)o(arner)e +(object.)h(\(Def)o(ault:)f(all)479 1530 y(w)o(arnings)h(are)g +(dropped\))p Black 396 1637 a Ft(\225)p Black 60 w Fq +(errors_with_line_numbers:)p Fv(If)c(true,)k(errors)f(contain)g(line)i +(numbers;)d(if)j(f)o(alse,)f(errors)g(contain)f(only)g(byte)479 +1745 y(positions.)h(The)g(latter)g(mode)f(is)i(f)o(aster)-5 +b(.)21 b(\(Def)o(ault:)e(true\))p Black 396 1853 a Ft(\225)p +Black 60 w Fq(enable_pinstr_nodes:)p Fv(If)e(true,)j(the)g(parser)f +(creates)i(e)o(xtra)e(nodes)g(for)h(processing)f(instructions.)g(If)h +(f)o(alse,)479 1961 y(processing)f(instructions)g(are)h(simply)g(added) +f(to)i(the)f(element)f(or)h(document)f(surrounding)e(the)j +(instructions.)479 2069 y(\(Def)o(ault:)g(f)o(alse\))p +Black 396 2177 a Ft(\225)p Black 60 w Fq(enable_super_root_node:)p +Fv(If)c(true,)k(the)g(parser)g(creates)g(an)g(e)o(xtra)g(node)f(which)g +(is)j(the)e(parent)f(of)h(the)g(root)479 2285 y(of)g(the)g(document)f +(tree.)h(This)g(node)f(is)i(called)f(super)g(root;)f(it)i(is)g(an)g +(element)e(with)i(type)e Fq(T_super_root)p Fv(.)g(-)h(If)479 +2393 y(there)g(are)g(processing)f(instructions)g(outside)h(the)g(root)f +(element)h(and)g(outside)f(the)i(DTD,)f(the)o(y)f(are)h(added)f(to)i +(the)479 2501 y(super)f(root)f(instead)h(of)g(the)g(document.)e(-)j(If) +f(f)o(alse,)g(the)g(super)g(root)g(node)f(is)i(not)f(created.)f(\(Def)o +(ault:)h(f)o(alse\))p Black 396 2609 a Ft(\225)p Black +60 w Fq(enable_comment_nodes:)p Fv(If)d(true,)i(the)i(parser)e(creates) +h(nodes)g(for)f(comments)g(with)i(type)f Fq(T_comment)p +Fv(;)f(if)479 2717 y(f)o(alse,)i(such)f(nodes)f(are)h(not)g(created.)f +(\(Def)o(ault:)h(f)o(alse\))p Black 396 2825 a Ft(\225)p +Black 60 w Fq(encoding:)p Fv(Speci\002es)f(the)i(internal)e(encoding)f +(of)i(the)g(parser)-5 b(.)20 b(Most)g(strings)h(are)f(then)f +(represented)g(according)479 2933 y(to)i(this)f(encoding;)f(ho)n(we)n +(v)o(er)f(there)h(are)i(some)f(e)o(xceptions)e(\(especially)i +Fq(ext_id)f Fv(v)n(alues)h(which)g(are)g(al)o(w)o(ays)479 +3041 y(UTF-8)g(encoded\).)e(\(Def)o(ault:)h(`Enc_iso88591\))p +Black 396 3148 a Ft(\225)p Black 60 w Fq +(recognize_standalone_declaration:)c Fv(If)21 b(true)e(and)h(if)h(the)f +(parser)f(is)i(v)n(alidating,)e(the)479 3256 y Fq(standalone="yes")f +Fv(declaration)h(forces)h(that)g(it)h(is)g(check)o(ed)e(whether)g(the)h +(document)e(is)j(a)g(standalone)479 3364 y(document.)d(-)j(If)f(f)o +(alse,)g(or)g(if)g(the)h(parser)e(is)i(in)g(well-formedness)d(mode,)h +(such)h(declarations)f(are)h(ignored.)479 3472 y(\(Def)o(ault:)g +(true\))p Black 396 3580 a Ft(\225)p Black 60 w Fq +(store_element_positions:)d Fv(If)j(true,)g(for)f(e)n(v)o(ery)g +(non-data)f(node)h(the)i(source)e(position)g(is)j(stored.)d(If)h(f)o +(alse,)479 3688 y(the)g(position)g(information)e(is)j(lost.)f(If)g(a)n +(v)n(ailable,)g(you)f(can)h(get)g(the)g(positions)g(of)g(nodes)f(by)h +(in)m(v)n(oking)e(the)479 3796 y Fq(position)i Fv(method.)e(\(Def)o +(ault:)i(true\))p Black 396 3904 a Ft(\225)p Black 60 +w Fq(idref_pass:)p Fv(If)e(true)i(and)g(if)g(there)g(is)h(an)f(ID)h +(inde)o(x,)e(the)h(parser)f(checks)h(whether)f(e)n(v)o(ery)g(IDREF)i +(or)e(IDREFS)479 4012 y(attrib)n(ute)h(refer)g(to)g(an)g(e)o(xisting)f +(node;)h(this)g(requires)g(that)g(the)g(parser)g(tra)n(v)o(erses)g(the) +g(whole)f(doument)g(tree.)h(If)479 4120 y(f)o(alse,)h(this)f(check)g +(is)h(left)f(out.)g(\(Def)o(ault:)g(f)o(alse\))p Black +396 4228 a Ft(\225)p Black 60 w Fq(validate_by_dfa:)p +Fv(If)e(true)h(and)h(if)h(the)f(content)f(model)g(for)h(an)g(element)g +(type)f(is)i(deterministic,)e(a)479 4336 y(deterministic)h(\002nite)g +(automaton)e(is)j(used)f(to)h(v)n(alidate)e(whether)g(the)i(element)e +(contents)h(match)f(the)i(content)479 4444 y(model)e(of)h(the)g(type.)g +(If)g(f)o(alse,)g(or)g(if)g(a)g(DF)-6 b(A)21 b(is)g(not)f(a)n(v)n +(ailable,)f(a)h(backtracking)e(algorithm)g(is)j(used)f(for)f(v)n +(alidation.)479 4552 y(\(Def)o(ault:)h(true\))p Black +396 4659 a Ft(\225)p Black 60 w Fq(accept_only_deterministic_models:)15 +b Fv(If)21 b(true,)e(only)h(deterministic)f(content)g(models)h(are)g +(accepted;)f(if)479 4767 y(f)o(alse,)i(an)o(y)e(syntactically)h +(correct)f(content)g(models)h(can)g(be)g(processed.)f(\(Def)o(ault:)g +(true\))p Black 3800 5278 a Fr(92)p Black eop +%%Page: 93 93 +93 92 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black -2 583 a Fp(4.4.4.)35 +b(Whic)o(h)f(con\002guration)g(should)g(I)f(use?)396 +751 y Fv(First,)21 b(I)f(recommend)e(to)i(v)n(ary)g(the)g(def)o(ault)f +(con\002guration)f(instead)i(of)g(creating)f(a)i(ne)n(w)f +(con\002guration)d(record.)i(F)o(or)396 859 y(instance,)h(to)g(set)h +Fq(idref_pass)e Fv(to)i Fq(true)p Fv(,)e(change)g(the)i(def)o(ault)e +(as)i(in:)396 1039 y Fq(let)45 b(config)e(=)i({)g(default_config)d +(with)i(idref_pass)g(=)g(true)g(})396 1230 y Fv(The)20 +b(background)d(is)k(that)f(I)h(can)f(add)f(more)h(options)f(to)h(the)g +(record)f(in)i(future)e(v)o(ersions)g(of)h(the)g(parser)f(without)396 +1338 y(breaking)g(your)f(programs.)396 1487 y Fu(Do)i(I)i(need)e(extra) +f(nodes)i(f)n(or)f(pr)o(ocessing)g(instructions?)g Fv(By)g(def)o(ault,) +g(such)g(nodes)f(are)h(not)g(created.)f(This)i(does)396 +1595 y(not)f(mean)g(that)g(the)g(processing)f(instructions)g(are)h +(lost;)h(ho)n(we)n(v)o(er)m(,)d(you)h(cannot)g(\002nd)h(out)g(the)g(e)o +(xact)g(location)f(where)396 1703 y(the)o(y)h(occur)-5 +b(.)19 b(F)o(or)h(e)o(xample,)e(the)j(follo)n(wing)d(XML)i(te)o(xt)396 +1883 y Fq()396 2074 y Fv(will)h(normally)e +(create)h(one)f(element)h(node)f(for)h Fq(x)g Fv(containing)e +Fr(one)i Fv(subnode)f(for)g Fq(y)p Fv(.)h(The)g(processing)f +(instructions)396 2182 y(are)h(attached)g(to)g Fq(x)h +Fv(in)f(a)h(separate)e(hash)h(table;)h(you)e(can)h(access)h(them)e +(using)h Fq(x)45 b(#)f(pinstr)g("pi1")20 b Fv(and)g Fq(x)44 +b(#)396 2290 y(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o(ely)-5 +b(.)18 b(The)i(information)d(is)k(lost)g(where)f(the)g(instructions)f +(occur)g(within)h Fq(x)p Fv(.)396 2439 y(If)g(the)h(option)d +Fq(enable_pinstr_nodes)g Fv(is)j(turned)e(on,)h(the)g(parser)f(creates) +i(e)o(xtra)e(nodes)g Fq(pi1)i Fv(and)e Fq(pi2)i Fv(such)f(that)396 +2547 y(the)g(subnodes)f(of)h Fq(x)h Fv(are)f(no)n(w:)396 +2728 y Fq(x)45 b(#)g(sub_nodes)e(=)i([)f(pi1;)g(y;)h(pi2)f(])396 +2919 y Fv(The)20 b(e)o(xtra)g(nodes)f(contain)g(the)h(processing)f +(instructions)g(in)i(the)f(usual)g(w)o(ay)-5 b(,)20 b(i.e.)g(you)f(can) +h(access)h(them)f(using)f Fq(pi1)396 3026 y(#)45 b(pinstr)f("pi1")20 +b Fv(and)f Fq(pi2)45 b(#)f(pinstr)g("pi2")p Fv(,)20 b(respecti)n(v)o +(ely)-5 b(.)396 3176 y(Note)20 b(that)h(you)e(will)i(need)e(an)i(e)o(x) +o(emplar)d(for)h(the)i(PI)f(nodes)g(\(see)g Fq(make_spec_from_alist)p +Fv(\).)396 3325 y Fu(Do)g(I)i(need)e(a)h(super)g(r)o(oot)d(node?)i +Fv(By)h(def)o(ault,)e(there)h(is)h(no)f(super)f(root)h(node.)f(The)h +Fq(document)f Fv(object)h(refers)396 3433 y(directly)g(to)g(the)g(node) +f(representing)f(the)j(root)e(element)h(of)g(the)g(document,)e(i.e.)396 +3613 y Fq(doc)45 b(#)f(root)g(=)h(r)396 3804 y Fv(if)21 +b Fq(r)f Fv(is)h(the)g(root)e(node.)g(This)h(is)i(sometimes)d(incon)m +(v)o(enient:)f(\(1\))h(Some)h(algorithms)f(become)g(simpler)h(if)g(e)n +(v)o(ery)f(node)396 3912 y(has)i(a)f(parent,)f(e)n(v)o(en)g(the)i(root) +e(node.)g(\(2\))h(Some)g(standards)f(such)h(as)h(XP)o(ath)f(call)g(the) +h("root)e(node")g(the)h(node)f(whose)396 4020 y(child)h(represents)f +(the)i(root)e(of)h(the)g(document.)e(\(3\))i(The)g(super)f(root)h(node) +f(can)h(serv)o(e)f(as)i(a)g(container)e(for)g(processing)396 +4128 y(instructions)g(outside)h(the)g(root)g(element.)f(Because)i(of)e +(these)i(reasons,)e(it)i(is)g(possible)f(to)h(create)f(an)g(e)o(xtra)f +(super)h(root)396 4236 y(node,)f(whose)h(child)g(is)h(the)f(root)g +(node:)396 4416 y Fq(doc)45 b(#)f(root)g(=)h(sr)403 b(&&)396 +4513 y(sr)45 b(#)f(sub_nodes)g(=)g([)h(r)g(])396 4704 +y Fv(When)20 b(e)o(xtra)g(nodes)f(are)h(also)h(created)e(for)h +(processing)f(instructions,)g(these)h(nodes)f(can)h(be)h(added)e(to)h +(the)g(super)g(root)396 4812 y(node)f(if)h(the)o(y)e(occur)h(outside)g +(the)g(root)g(element)g(\(reason)f(\(3\)\),)h(and)g(the)g(order)g +(re\003ects)g(the)h(order)e(in)i(the)f(source)g(te)o(xt.)p +Black 3800 5278 a Fr(93)p Black eop +%%Page: 94 94 +94 93 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fv(Note)g(that)h(you)e +(will)i(need)e(an)i(e)o(x)o(emplar)d(for)h(the)i(super)e(root)h(node)f +(\(see)h Fq(make_spec_from_alist)p Fv(\).)396 728 y Fu(What)g(is)h(the) +g(effect)e(of)h(the)h(UTF-8)e(encoding?)h Fv(By)h(def)o(ault,)e(the)h +(parser)g(represents)f(strings)h(\(with)g(fe)n(w)396 +836 y(e)o(xceptions\))e(as)j(ISO-8859-1)c(strings.)i(These)h(are)g +(well-kno)n(wn,)d(and)j(there)f(are)h(tools)g(and)f(fonts)g(for)h(this) +g(encoding.)396 986 y(Ho)n(we)n(v)o(er)m(,)e(internationalization)g +(may)h(require)g(that)i(you)e(switch)h(o)o(v)o(er)f(to)i(UTF-8)e +(encoding.)f(In)i(most)396 1094 y(en)m(vironments,)d(the)k(immediate)e +(ef)n(fect)h(will)g(be)h(that)f(you)f(cannot)g(read)h(strings)g(with)g +(character)f(codes)h(>=)h(160)e(an)o(y)396 1202 y(longer;)g(your)g +(terminal)h(will)h(only)e(sho)n(w)h(funn)o(y)e(glyph)h(combinations.)f +(It)i(is)h(strongly)e(recommended)e(to)k(install)396 +1310 y(Unicode)e(fonts)h(\(GNU)g(Unifont)f +(\(http://czyborra.com/unifon)o(t/\),)c(Markus)k(K)o(uhn')-5 +b(s)19 b(fonts)396 1417 y(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/do) +m(wnlo)o(ad/u)o(cs-fo)o(nts.tar)g(.g)o(z\)\))14 b(and)20 +b(terminal)f(emulators)h(that)g(can)g(handle)396 1525 +y(UTF-8)g(byte)g(sequences)f(\(http://myweb)m(.clark.net/pub/d)o(ick)o +(e)o(y)o(/xter)o(m/x)o(term.)o(html\))o(.)c(Furthermore,)i(a)k(Unicode) +396 1633 y(editor)f(may)f(be)i(helpful)e(\(such)g(as)i(Y)-9 +b(udit)20 b(\(ftp://metalab)m(.unc.edu/pub)o(/Linu)o(x/ap)o(ps/ed)o +(itors/X/\)\))o(.)15 b(There)k(are)h(also)396 1741 y(F)-6 +b(A)h(Q)21 b(\(http://www)-5 b(.cl.cam.ac.uk/~mgk25)o(/unico)o(de)o +(.htm)o(l\))15 b(by)20 b(Markus)f(K)o(uhn.)396 1891 y(By)i(setting)f +Fq(encoding)f Fv(to)i Fq(`Enc_utf8)e Fv(all)i(strings)f(originating)e +(from)h(the)i(parsed)e(XML)h(document)e(are)396 1999 +y(represented)h(as)i(UTF-8)e(strings.)h(This)h(includes)e(not)h(only)f +(character)g(data)h(and)g(attrib)n(ute)g(v)n(alues)g(b)n(ut)g(also)g +(element)396 2107 y(names,)g(attrib)n(ute)g(names)g(and)f(so)i(on,)e +(as)i(it)g(is)g(possible)f(to)h(use)f(an)o(y)f(Unicode)g(letter)i(to)f +(form)f(such)h(names.)g(Strictly)396 2214 y(speaking,)f(PXP)i(is)g +(only)e(XML-compliant)f(if)j(the)f(UTF-8)g(mode)f(is)i(used;)f +(otherwise)g(it)h(will)g(ha)n(v)o(e)e(dif)n(\002culties)396 +2322 y(when)h(v)n(alidating)f(documents)f(containing)g +(non-ISO-8859-1-names.)396 2472 y(This)j(mode)e(does)h(not)g(ha)n(v)o +(e)f(an)o(y)h(impact)f(on)h(the)g(e)o(xternal)f(representation)f(of)i +(documents.)f(The)g(character)g(set)396 2580 y(assumed)h(when)g +(reading)e(a)j(document)d(is)j(set)g(in)g(the)f(XML)g(declaration,)e +(and)i(character)f(set)i(when)e(writing)h(a)396 2688 +y(document)e(must)j(be)f(passed)g(to)g(the)g Fq(write)g +Fv(method.)396 2837 y Fu(Ho)o(w)g(do)h(I)g(check)f(that)g(nodes)h +(exist)f(which)h(ar)o(e)e(r)o(eferr)o(ed)g(by)i(IDREF)g(attrib)n(utes?) +e Fv(First,)i(you)e(must)h(create)g(an)396 2945 y(inde)o(x)f(of)h(all)h +(occurring)d(ID)i(attrib)n(utes:)396 3125 y Fq(let)45 +b(index)f(=)g(new)h(hash_index)396 3316 y Fv(This)21 +b(inde)o(x)e(must)h(be)g(passed)g(to)g(the)h(parsing)e(function:)396 +3496 y Fq(parse_document_entity)486 3593 y(~id_index:\(index)42 +b(:>)j(index\))486 3691 y(config)f(source)g(spec)396 +3882 y Fv(Ne)o(xt,)20 b(you)f(must)h(turn)g(on)g(the)g +Fq(idref_pass)f Fv(mode:)396 4062 y Fq(let)45 b(config)e(=)i({)g +(default_config)d(with)i(idref_pass)g(=)g(true)g(})396 +4253 y Fv(Note)20 b(that)h(no)n(w)e(the)i(whole)e(document)f(tree)j +(will)g(be)f(tra)n(v)o(ersed,)f(and)g(e)n(v)o(ery)g(node)g(will)i(be)f +(check)o(ed)f(for)h(IDREF)g(and)396 4361 y(IDREFS)h(attrib)n(utes.)f +(If)g(the)g(tree)g(is)h(big,)f(this)h(may)f(tak)o(e)g(some)g(time.)396 +4510 y Fu(What)g(ar)o(e)g(deterministic)g(content)g(models?)g +Fv(These)g(type)g(of)g(models)g(can)g(speed)f(up)h(the)g(v)n(alidation) +f(checks;)396 4618 y(furthermore)f(the)o(y)h(ensure)g +(SGML-compatibility)-5 b(.)18 b(In)i(particular)m(,)e(a)j(content)e +(model)g(is)i(deterministic)e(if)i(the)f(parser)396 4726 +y(can)g(determine)f(the)h(actually)g(used)g(alternati)n(v)o(e)f(by)g +(inspecting)g(only)h(the)g(current)f(tok)o(en.)g(F)o(or)h(e)o(xample,)e +(this)396 4834 y(element)i(has)g(non-deterministic)e(contents:)p +Black 3800 5278 a Fr(94)p Black eop +%%Page: 95 95 +95 94 bop Black 2348 67 a Fr(Chapter)20 b(4.)g(Con\002guring)e(and)i +(calling)f(the)h(par)o(ser)p Black 396 579 a Fq()396 770 y Fv(If)20 +b(the)h(\002rst)f(element)g(in)g Fq(x)h Fv(is)g Fq(u)p +Fv(,)f(the)h(parser)e(does)h(not)g(kno)n(w)f(which)h(of)g(the)g +(alternati)n(v)o(es)f Fq(\(u,v\))h Fv(or)g Fq(\(u,y+\))g +Fv(will)396 878 y(w)o(ork;)g(the)g(parser)g(must)g(also)g(inspect)g +(the)h(second)e(element)g(to)i(be)f(able)g(to)g(distinguish)g(between)f +(the)h(alternati)n(v)o(es.)396 986 y(Because)h(such)f(look-ahead)d +(\(or)j("guessing"\))e(is)k(required,)c(this)i(e)o(xample)f(is)i +(non-deterministic.)396 1135 y(The)f(XML)g(standard)f(demands)g(that)i +(content)e(models)g(must)i(be)f(deterministic.)f(So)h(it)h(is)g +(recommended)c(to)k(turn)e(the)396 1243 y(option)g Fq +(accept_only_deterministic_models)d Fv(on;)j(ho)n(we)n(v)o(er)m(,)f +(PXP)j(can)f(also)h(process)e(non-deterministic)396 1351 +y(models)h(using)g(a)g(backtracking)e(algorithm.)396 +1500 y(Deterministic)i(models)g(ensure)f(that)h(v)n(alidation)f(can)h +(be)g(performed)e(in)i(linear)g(time.)g(In)g(order)f(to)h(get)g(the)396 +1608 y(maximum)f(bene\002ts,)h(PXP)h(also)f(implements)f(a)i(special)f +(v)n(alidator)f(that)h(pro\002ts)g(from)f(deterministic)h(models;)f +(this)396 1716 y(is)i(the)g(deterministic)e(\002nite)h(automaton)f +(\(DF)-6 b(A\).)19 b(This)i(v)n(alidator)d(is)k(enabled)d(per)g +(element)h(type)g(if)g(the)g(element)396 1824 y(type)g(has)g(a)h +(deterministic)e(model)h(and)f(if)i(the)f(option)f Fq(validate_by_dfa)f +Fv(is)j(turned)e(on.)396 1974 y(In)h(general,)f(I)h(e)o(xpect)g(that)g +(the)g(DF)-6 b(A)21 b(method)e(is)i(f)o(aster)f(than)g(the)g +(backtracking)e(method;)g(especially)i(in)h(the)f(w)o(orst)396 +2082 y(case)h(the)f(DF)-6 b(A)21 b(tak)o(es)f(only)g(linear)f(time.)i +(Ho)n(we)n(v)o(er)m(,)d(if)i(the)g(content)g(model)f(has)h(only)g(fe)n +(w)g(alternati)n(v)o(es)f(and)h(the)396 2190 y(alternati)n(v)o(es)f(do) +h(not)g(nest,)g(the)h(backtracking)c(algorithm)i(may)g(be)i(better)-5 +b(.)-2 2691 y Fx(4.5.)39 b(Updates)396 2871 y Fr(Some)20 +b(\(often)f(later)i(added\))d(featur)m(es)i(that)g(ar)m(e)h(otherwise)f +(not)g(e)n(xplained)f(in)h(the)h(manual)d(b)n(ut)j(worth)f(to)g(be)396 +2979 y(mentioned.)p Black 396 3211 a Ft(\225)p Black +60 w Fv(Methods)g(node_position,)d(node_path,)g(nth_node,)h(pre)n +(vious_node,)e(ne)o(xt_node)h(for)j(nodes:)f(See)479 +3319 y(pxp_document.mli)p Black 396 3427 a Ft(\225)p +Black 60 w Fv(Functions)h(to)g(determine)f(the)h(document)e(order)h(of) +h(nodes:)f(compare,)g(create_ord_inde)o(x,)c(ord_number)m(,)479 +3535 y(ord_compare:)i(See)k(pxp_document.mli)p Black +3800 5278 a Fr(95)p Black eop +%%Page: 96 96 +96 95 bop Black Black Black Black eop +%%Trailer +end +userdict /end-hook known{end-hook}if +%%EOF diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent new file mode 100644 index 000000000..f2e0eb85c --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/dtd.mli.ent @@ -0,0 +1,374 @@ + + diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml b/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml new file mode 100755 index 000000000..4db669036 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/getcode.ml @@ -0,0 +1,56 @@ +#! /bin/sh +# (* +exec ocamlfattop "$0" +*) directory ".";; + +open Str;; + +let name_re = regexp "(\\*\\$[ \t]*\\([a-zA-Z0-9.-]*\\)[ \t]*\\*)";; +let subst_re = regexp "[<>&'%]";; + +let begin_entity name = + "\n" +;; + + +let text = ref "" in +let within_entity = ref false in +try + while true do + let line = read_line() in + if string_match name_re line 0 then begin + let name = matched_group 1 line in + if !within_entity then + text := !text ^ "\n" ^ end_entity(); + within_entity := false; + if name <> "-" then begin + text := !text ^ begin_entity name; + within_entity := true + end + end + else + if !within_entity then begin + let line' = + global_substitute subst_re + (fun s -> + let s' = matched_group 0 s in + match s' with + "<" -> "<" + | ">" -> ">" + | "&" -> "&" + | "'" -> "'" + | "%" -> "&percent;" + | _ -> assert false) + line + in + text := !text ^ "\n" ^ line' + end + done; +with End_of_file -> + if !within_entity then + text := !text ^ "\n" ^ end_entity(); + print_string !text +;; diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css new file mode 100644 index 000000000..67dfaecb7 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.css @@ -0,0 +1,4 @@ +.acronym { + font-weight: bold; + color: #c71585 +} diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl new file mode 100644 index 000000000..cd9b1e2bf --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.dsl @@ -0,0 +1,74 @@ + + + + + +]]> + + +]]> +]> + + + + +;; HTML: + + + +;; printing: + + + +;; both: + +(define %section-autolabel% + ;; Are sections enumerated? + #t) + + + + + diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml new file mode 100644 index 000000000..1cb2064cb --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/markup.sgml @@ -0,0 +1,5109 @@ +PXP"> +PXP"> + + + + + +%readme.code.to-html; +%get.markup-yacc.mli; +%get.markup-dtd.mli; + + + +]> + + + + + The PXP user's guide + + + + + Gerd + Stolpmann + + +
+ gerd@gerd-stolpmann.de +
+
+
+
+
+ + + 1999, 2000Gerd Stolpmann + + + + + +&markup; is a validating parser for XML-1.0 which has been +written entirely in Objective Caml. + + + Download &markup;: + +The free &markup; library can be downloaded at + +http://www.ocaml-programming.de/packages/ +. This user's guide is included. +Newest releases of &markup; will be announced in +The OCaml Link +Database. + + + + + + License + +This document, and the described software, "&markup;", are copyright by +Gerd Stolpmann. + + + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this document and the "&markup;" software (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + + +The Software is provided ``as is'', without warranty of any kind, express +or implied, including but not limited to the warranties of +merchantability, fitness for a particular purpose and noninfringement. +In no event shall Gerd Stolpmann be liable for any claim, damages or +other liability, whether in an action of contract, tort or otherwise, +arising from, out of or in connection with the Software or the use or +other dealings in the software. + + + +
+ + + + + + User's guide + + + What is XML? + + + Introduction + + XML (short for Extensible Markup Language) +generalizes the idea that text documents are typically structured in sections, +sub-sections, paragraphs, and so on. The format of the document is not fixed +(as, for example, in HTML), but can be declared by a so-called DTD (document +type definition). The DTD describes only the rules how the document can be +structured, but not how the document can be processed. For example, if you want +to publish a book that uses XML markup, you will need a processor that converts +the XML file into a printable format such as Postscript. On the one hand, the +structure of XML documents is configurable; on the other hand, there is no +longer a canonical interpretation of the elements of the document; for example +one XML DTD might want that paragraphes are delimited by +para tags, and another DTD expects p tags +for the same purpose. As a result, for every DTD a new processor is required. + + + +Although XML can be used to express structured text documents it is not limited +to this kind of application. For example, XML can also be used to exchange +structured data over a network, or to simply store structured data in +files. Note that XML documents cannot contain arbitrary binary data because +some characters are forbidden; for some applications you need to encode binary +data as text (e.g. the base 64 encoding). + + + + + The "hello world" example + +The following example shows a very simple DTD, and a corresponding document +instance. The document is structured such that it consists of sections, and +that sections consist of paragraphs, and that paragraphs contain plain text: + + + + + + +]]> + + + The following document is an instance of this DTD: + + + + + +
+ This is a paragraph of the first section. + This is another paragraph of the first section. +
+
+ This is the only paragraph of the second section. +
+
+]]> +
+ + As in HTML (and, of course, in grand-father SGML), the "pieces" of +the document are delimited by element braces, i.e. such a piece begins with +<name-of-the-type-of-the-piece> and ends with +</name-of-the-type-of-the-piece>, and the pieces are +called elements. Unlike HTML and SGML, both start tags and +end tags (i.e. the delimiters written in angle brackets) can never be left +out. For example, HTML calls the paragraphs simply p, and +because paragraphs never contain paragraphs, a sequence of several paragraphs +can be written as: + +First paragraph +

Second paragraph]]> + +This is not possible in XML; continuing our example above we must always write + +First paragraph +Second paragraph]]> + +The rationale behind that is to (1) simplify the development of XML parsers +(you need not convert the DTD into a deterministic finite automaton which is +required to detect omitted tags), and to (2) make it possible to parse the +document independent of whether the DTD is known or not. + + + +The first line of our sample document, + + +]]> + + +is the so-called XML declaration. It expresses that the +document follows the conventions of XML version 1.0, and that the document is +encoded using characters from the ISO-8859-1 character set (often known as +"Latin 1", mostly used in Western Europe). Although the XML declaration is not +mandatory, it is good style to include it; everybody sees at the first glance +that the document uses XML markup and not the similar-looking HTML and SGML +markup languages. If you omit the XML declaration, the parser will assume +that the document is encoded as UTF-8 or UTF-16 (there is a rule that makes +it possible to distinguish between UTF-8 and UTF-16 automatically); these +are encodings of Unicode's universal character set. (Note that &pxp;, unlike its +predecessor "Markup", fully supports Unicode.) + + + +The second line, + + +]]> + + +names the DTD that is going to be used for the rest of the document. In +general, it is possible that the DTD consists of two parts, the so-called +external and the internal subset. "External" means that the DTD exists as a +second file; "internal" means that the DTD is included in the same file. In +this example, there is only an external subset, and the system identifier +"simple.dtd" specifies where the DTD file can be found. System identifiers are +interpreted as URLs; for instance this would be legal: + + +]]> + + +Please note that &pxp; cannot interpret HTTP identifiers by default, but it is +possible to change the interpretation of system identifiers. + + + +The word immediately following DOCTYPE determines which of +the declared element types (here "document", "section", and "paragraph") is +used for the outermost element, the root element. In this +example it is document because the outermost element is +delimited by <document> and +</document>. + + + +The DTD consists of three declarations for element types: +document, section, and +paragraph. Such a declaration has two parts: + + +<!ELEMENT name content-model> + + +The content model is a regular expression which describes the possible inner +structure of the element. Here, document contains one or +more sections, and a section contains one or more +paragraphs. Note that these two element types are not allowed to contain +arbitrary text. Only the paragraph element type is declared +such that parsed character data (indicated by the symbol +#PCDATA) is permitted. + + + +See below for a detailed discussion of content models. + + + + + XML parsers and processors + +XML documents are human-readable, but this is not the main purpose of this +language. XML has been designed such that documents can be read by a program +called an XML parser. The parser checks that the document +is well-formatted, and it represents the document as objects of the programming +language. There are two aspects when checking the document: First, the document +must follow some basic syntactic rules, such as that tags are written in angle +brackets, that for every start tag there must be a corresponding end tag and so +on. A document respecting these rules is +well-formed. Second, the document must match the DTD in +which case the document is valid. Many parsers check only +on well-formedness and ignore the DTD; &pxp; is designed such that it can +even validate the document. + + + +A parser does not make a sensible application, it only reads XML +documents. The whole application working with XML-formatted data is called an +XML processor. Often XML processors convert documents into +another format, such as HTML or Postscript. Sometimes processors extract data +of the documents and output the processed data again XML-formatted. The parser +can help the application processing the document; for example it can provide +means to access the document in a specific manner. &pxp; supports an +object-oriented access layer specially. + + + + + Discussion + +As we have seen, there are two levels of description: On the one hand, XML can +define rules about the format of a document (the DTD), on the other hand, XML +expresses structured documents. There are a number of possible applications: + + + + + +XML can be used to express structured texts. Unlike HTML, there is no canonical +interpretation; one would have to write a backend for the DTD that translates +the structured texts into a format that existing browsers, printers +etc. understand. The advantage of a self-defined document format is that it is +possible to design the format in a more problem-oriented way. For example, if +the task is to extract reports from a database, one can use a DTD that reflects +the structure of the report or the database. A possible approach would be to +have an element type for every database table and for every column. Once the +DTD has been designed, the report procedure can be splitted up in a part that +selects the database rows and outputs them as an XML document according to the +DTD, and in a part that translates the document into other formats. Of course, +the latter part can be solved in a generic way, e.g. there may be configurable +backends for all DTDs that follow the approach and have element types for +tables and columns. + + + +XML plays the role of a configurable intermediate format. The database +extraction function can be written without having to know the details of +typesetting; the backends can be written without having to know the details of +the database. + + + +Of course, there are traditional solutions. One can define an ad hoc +intermediate text file format. This disadvantage is that there are no names for +the pieces of the format, and that such formats usually lack of documentation +because of this. Another solution would be to have a binary representation, +either as language-dependent or language-independent structure (example of the +latter can be found in RPC implementations). The disadvantage is that it is +harder to view such representations, one has to write pretty printers for this +purpose. It is also more difficult to enter test data; XML is plain text that +can be written using an arbitrary editor (Emacs has even a good XML mode, +PSGML). All these alternatives suffer from a missing structure checker, +i.e. the programs processing these formats usually do not check the input file +or input object in detail; XML parsers check the syntax of the input (the +so-called well-formedness check), and the advanced parsers like &markup; even +verify that the structure matches the DTD (the so-called validation). + + + + + + +XML can be used as configurable communication language. A fundamental problem +of every communication is that sender and receiver must follow the same +conventions about the language. For data exchange, the question is usually +which data records and fields are available, how they are syntactically +composed, and which values are possible for the various fields. Similar +questions arise for text document exchange. XML does not answer these problems +completely, but it reduces the number of ambiguities for such conventions: The +outlines of the syntax are specified by the DTD (but not necessarily the +details), and XML introduces canonical names for the components of documents +such that it is simpler to describe the rest of the syntax and the semantics +informally. + + + + + +XML is a data storage format. Currently, every software product tends to use +its own way to store data; commercial software often does not describe such +formats, and it is a pain to integrate such software into a bigger project. +XML can help to improve this situation when several applications share the same +syntax of data files. DTDs are then neutral instances that check the format of +data files independent of applications. + + + + + + + + + + + + + Highlights of XML + + +This section explains many of the features of XML, but not all, and some +features not in detail. For a complete description, see the XML +specification. + + + + The DTD and the instance + +The DTD contains various declarations; in general you can only use a feature if +you have previously declared it. The document instance file may contain the +full DTD, but it is also possible to split the DTD into an internal and an +external subset. A document must begin as follows if the full DTD is included: + + +<?xml version="1.0" encoding="Your encoding"?> +<!DOCTYPE root [ + Declarations +]> + + +These declarations are called the internal subset. Note +that the usage of entities and conditional sections is restricted within the +internal subset. + + +If the declarations are located in a different file, you can refer to this file +as follows: + + +<?xml version="1.0" encoding="Your encoding"?> +<!DOCTYPE root SYSTEM "file name"> + + +The declarations in the file are called the external +subset. The file name is called the system +identifier. +It is also possible to refer to the file by a so-called +public identifier, but most XML applications won't use +this feature. + + +You can also specify both internal and external subsets. In this case, the +declarations of both subsets are mixed, and if there are conflicts, the +declaration of the internal subset overrides those of the external subset with +the same name. This looks as follows: + + +<?xml version="1.0" encoding="Your encoding"?> +<!DOCTYPE root SYSTEM "file name" [ + Declarations +]> + + + + +The XML declaration (the string beginning with <?xml and +ending at ?>) should specify the encoding of the +file. Common values are UTF-8, and the ISO-8859 series of character sets. Note +that every file parsed by the XML processor can begin with an XML declaration +and that every file may have its own encoding. + + + +The name of the root element must be mentioned directly after the +DOCTYPE string. This means that a full document instance +looks like + + +<?xml version="1.0" encoding="Your encoding"?> +<!DOCTYPE root SYSTEM "file name" [ + Declarations +]> + +<root> + inner contents +</root> + + + + + + + + Reserved characters + +Some characters are generally reserved to indicate markup such that they cannot +be used for character data. These characters are <, >, and +&. Furthermore, single and double quotes are sometimes reserved. If you +want to include such a character as character, write it as follows: + + + + +&lt; instead of < + + + + +&gt; instead of > + + + + +&amp; instead of & + + + + +&apos; instead of ' + + + + +&quot; instead of " + + + + +All other characters are free in the document instance. It is possible to +include a character by its position in the Unicode alphabet: + + +&#n; + + +where n is the decimal number of the +character. Alternatively, you can specify the character by its hexadecimal +number: + + +&#xn; + + +In the scope of declarations, the character % is no longer free. To include it +as character, you must use the notations &#37; or +&#x25;. + + + Note that besides &lt;, &gt;, &amp;, +&apos;, and &quot; there are no predefines character entities. This is +different from HTML which defines a list of characters that can be referenced +by name (e.g. &auml; for ä); however, if you prefer named characters, you +can declare such entities yourself (see below). + + + + + + + Elements and ELEMENT declarations + + +Elements structure the document instance in a hierarchical way. There is a +top-level element, the root element, which contains a +sequence of inner elements and character sections. The inner elements are +structured in the same way. Every element has an element +type. The beginning of the element is indicated by a start +tag, written + + +<element-type> + + +and the element continues until the corresponding end tag +is reached: + + +</element-type> + + +In XML, it is not allowed to omit start or end tags, even if the DTD would +permit this. Note that there are no special rules how to interpret spaces or +newlines near start or end tags; all spaces and newlines count. + + + +Every element type must be declared before it can be used. The declaration +consists of two parts: the ELEMENT declaration describes the content model, +i.e. which inner elements are allowed; the ATTLIST declaration describes the +attributes of the element. + + + +An element can simply allow everything as content. This is written: + + +<!ELEMENT name ANY> + + +On the opposite, an element can be forced to be empty; declared by: + + +<!ELEMENT name EMPTY> + + +Note that there is an abbreviated notation for empty element instances: +<name/>. + + + +There are two more sophisticated forms of declarations: so-called +mixed declarations, and regular +expressions. An element with mixed content contains character data +interspersed with inner elements, and the set of allowed inner elements can be +specified. In contrast to this, a regular expression declaration does not allow +character data, but the inner elements can be described by the more powerful +means of regular expressions. + + + +A declaration for mixed content looks as follows: + + +<!ELEMENT name (#PCDATA | element1 | ... | elementn )*> + + +or if you do not want to allow any inner element, simply + + +<!ELEMENT name (#PCDATA)> + + + + +

+ Example + +If element type q is declared as + + +]]> + + +this is a legal instance: + + +This is character datawith inner elements]]> + + +But this is illegal because t has not been enumerated in the +declaration: + + +This is character datawith inner elements]]> + + +
+ + +The other form uses a regular expression to describe the possible contents: + + +<!ELEMENT name regexp> + + +The following well-known regexp operators are allowed: + + + + +element-name + + + + + +(subexpr1 , ... , subexprn ) + + + + + +(subexpr1 | ... | subexprn ) + + + + + +subexpr* + + + + + +subexpr+ + + + + + +subexpr? + + + + +The , operator indicates a sequence of sub-models, the +| operator describes alternative sub-models. The +* indicates zero or more repetitions, and ++ one or more repetitions. Finally, ? can +be used for optional sub-models. As atoms the regexp can contain names of +elements; note that it is not allowed to include #PCDATA. + + + +The exact syntax of the regular expressions is rather strange. This can be +explained best by a list of constraints: + + + + +The outermost expression must not be +element-name. + + Illegal: +]]>; this must be written as +]]>. + + + +For the unary operators subexpr*, +subexpr+, and +subexpr?, the +subexpr must not be again an +unary operator. + + Illegal: +]]>; this must be written as +]]>. + + + +Between ) and one of the unary operatory +*, +, or ?, there must +not be whitespace. + Illegal: +]]>; this must be written as +]]>. + + There is the additional constraint that the +right parenthsis must be contained in the same entity as the left parenthesis; +see the section about parsed entities below. + + + + + + +Note that there is another restriction on regular expressions which must be +deterministic. This means that the parser must be able to see by looking at the +next token which alternative is actually used, or whether the repetition +stops. The reason for this is simply compatability with SGML (there is no +intrinsic reason for this rule; XML can live without this restriction). + + +
+ Example + +The elements are declared as follows: + + + + + + +]]> + +This is a legal instance: + + +Some characters]]> + + +(Note: <s/> is an abbreviation for +<s></s>.) + +It would be illegal to leave ]]> out because at +least one instance of s or t must be +present. It would be illegal, too, if characters existed outside the +r element; the only exception is white space. -- This is +legal, too: + + +]]> + + +
+ +
+ + + + + Attribute lists and ATTLIST declarations + +Elements may have attributes. These are put into the start tag of an element as +follows: + + +<element-name attribute1="value1" ... attributen="valuen"> + + +Instead of +"valuek" +it is also possible to use single quotes as in +'valuek'. +Note that you cannot use double quotes literally within the value of the +attribute if double quotes are the delimiters; the same applies to single +quotes. You can generally not use < and & as characters in attribute +values. It is possible to include the paraphrases &lt;, &gt;, +&amp;, &apos;, and &quot; (and any other reference to a general +entity as long as the entity is not defined by an external file) as well as +&#n;. + + + +Before you can use an attribute you must declare it. An ATTLIST declaration +looks as follows: + + +<!ATTLIST element-name + attribute-name attribute-type attribute-default + ... + attribute-name attribute-type attribute-default +> + + +There are a lot of types, but most important are: + + + + +CDATA: Every string is allowed as attribute value. + + + + +NMTOKEN: Every nametoken is allowed as attribute +value. Nametokens consist (mainly) of letters, digits, ., :, -, _ in arbitrary +order. + + + + +NMTOKENS: A space-separated list of nametokens is allowed as +attribute value. + + + + +The most interesting default declarations are: + + + + +#REQUIRED: The attribute must be specified. + + + + +#IMPLIED: The attribute can be specified but also can be +left out. The application can find out whether the attribute was present or +not. + + + + +"value" or +'value': This particular value is +used as default if the attribute is omitted in the element. + + + + + +
+ Example + +This is a valid attribute declaration for element type r: + + + +]]> + +This means that x is a required attribute that cannot be +left out, while y and z are optional. The +XML parser indicates the application whether y is present or +not, but if z is missing the default value +"one two three" is returned automatically. + + + +This is a valid example of these attributes: + + +]]> + + +
+ +
+ + + Parsed entities + +Elements describe the logical structure of the document, while +entities determine the physical structure. Entities are +the pieces of text the parser operates on, mostly files and macros. Entities +may be parsed in which case the parser reads the text and +interprets it as XML markup, or unparsed which simply +means that the data of the entity has a foreign format (e.g. a GIF icon). + + + If the parsed entity is going to be used as part of the DTD, it +is called a parameter entity. You can declare a parameter +entity with a fixed text as content by: + + +<!ENTITY % name "value"> + + +Within the DTD, you can refer to this entity, i.e. read +the text of the entity, by: + + +%name; + + +Such entities behave like macros, i.e. when they are referred to, the +macro text is inserted and read instead of the original text. + +
+ Example + +For example, you can declare two elements with the same content model by: + + + + + +]]> + + + +
+ +If the contents of the entity are given as string constant, the entity is +called an internal entity. It is also possible to name a +file to be used as content (an external entity): + + +<!ENTITY % name SYSTEM "file name"> + + +There are some restrictions for parameter entities: + + + + +If the internal parameter entity contains the first token of a declaration +(i.e. <!), it must also contain the last token of the +declaration, i.e. the >. This means that the entity +either contains a whole number of complete declarations, or some text from the +middle of one declaration. + +Illegal: + +"> + Because <! is contained in the main +entity, and the corresponding > is contained in the +entity e. + + + +If the internal parameter entity contains a left paranthesis, it must also +contain the corresponding right paranthesis. + +Illegal: + + + +]]> Because ( is contained in the entity +e, and the corresponding ) is +contained in the main entity. + + + +When reading text from an entity, the parser automatically inserts one space +character before the entity text and one space character after the entity +text. However, this rule is not applied within the definition of another +entity. +Legal: + + + +]]> Because %suffix; is referenced within +the definition text for iconfile, no additional spaces are +added. + +Illegal: + + + +]]> +Because %suffix; is referenced outside the definition +text of another entity, the parser replaces %suffix; by +spacetestspace. +Illegal: + + + +]]> Because there is a whitespace between ) +and *, which is illegal. + + + +An external parameter entity must always consist of a whole number of complete +declarations. + + + + +In the internal subset of the DTD, a reference to a parameter entity (internal +or external) is only allowed at positions where a new declaration can start. + + + +
+ + +If the parsed entity is going to be used in the document instance, it is called +a general entity. Such entities can be used as +abbreviations for frequent phrases, or to include external files. Internal +general entities are declared as follows: + + +<!ENTITY name "value"> + + +External general entities are declared this way: + + +<!ENTITY name SYSTEM "file name"> + + +References to general entities are written as: + + +&name; + + +The main difference between parameter and general entities is that the former +are only recognized in the DTD and that the latter are only recognized in the +document instance. As the DTD is parsed before the document, the parameter +entities are expanded first; for example it is possible to use the content of a +parameter entity as the name of a general entity: +&#38;%name;;This construct is only +allowed within the definition of another entity; otherwise extra spaces would +be added (as explained above). Such indirection is not recommended. + +Complete example: + + + + + +]]> +You can now write &text; in the document instance, and +depending on the value of variant either +text-a or text-b is inserted. +. + + +General entities must respect the element hierarchy. This means that there must +be an end tag for every start tag in the entity value, and that end tags +without corresponding start tags are not allowed. + + +
+ Example + +If the author of a document changes sometimes, it is worthwhile to set up a +general entity containing the names of the authors. If the author changes, you +need only to change the definition of the entity, and do not need to check all +occurrences of authors' names: + + + +]]> + + +In the document text, you can now refer to the author names by writing +&authors;. + + + +Illegal: +The following two entities are illegal because the elements in the definition +do not nest properly: + + +"> +"> +]]> + +
+ + +Earlier in this introduction we explained that there are substitutes for +reserved characters: &lt;, &gt;, &amp;, &apos;, and +&quot;. These are simply predefined general entities; note that they are +the only predefined entities. It is allowed to define these entities again +as long as the meaning is unchanged. + + + + + Notations and unparsed entities + +Unparsed entities have a foreign format and can thus not be read by the XML +parser. Unparsed entities are always external. The format of an unparsed entity +must have been declared, such a format is called a +notation. The entity can then be declared by referring to +this notation. As unparsed entities do not contain XML text, it is not possible +to include them directly into the document; you can only declare attributes +such that names of unparsed entities are acceptable values. + + + +As you can see, unparsed entities are too complicated in order to have any +purpose. It is almost always better to simply pass the name of the data file as +normal attribute value, and let the application recognize and process the +foreign format. + + + + + + + + + + + A complete example: The <emphasis>readme</emphasis> DTD + +The reason for readme was that I often wrote two versions +of files such as README and INSTALL which explain aspects of a distributed +software archive; one version was ASCII-formatted, the other was written in +HTML. Maintaining both versions means double amount of work, and changes +of one version may be forgotten in the other version. To improve this situation +I invented the readme DTD which allows me to maintain only +one source written as XML document, and to generate the ASCII and the HTML +version from it. + + + +In this section, I explain only the DTD. The readme DTD is +contained in the &markup; distribution together with the two converters to +produce ASCII and HTML. Another section of this manual describes the HTML +converter. + + + +The documents have a simple structure: There are up to three levels of nested +sections, paragraphs, item lists, footnotes, hyperlinks, and text emphasis. The +outermost element has usually the type readme, it is +declared by + + + + +]]> + +This means that this element contains one or more sections of the first level +(element type sect1), and that the element has a required +attribute title containing character data (CDATA). Note that +readme elements must not contain text data. + + + +The three levels of sections are declared as follows: + + + + + + + +]]> + +Every section has a title element as first subelement. After +the title an arbitrary but non-empty sequence of inner sections, paragraphs and +item lists follows. Note that the inner sections must belong to the next higher +section level; sect3 elements must not contain inner +sections because there is no next higher level. + + + +Obviously, all three declarations allow paragraphs (p) and +item lists (ul). The definition can be simplified at this +point by using a parameter entity: + + + + + + + + + +]]> + +Here, the entity p.like is nothing but a macro abbreviating +the same sequence of declarations; if new elements on the same level as +p and ul are later added, it is +sufficient only to change the entity definition. Note that there are some +restrictions on the usage of entities in this context; most important, entities +containing a left paranthesis must also contain the corresponding right +paranthesis. + + + +Note that the entity p.like is a +parameter entity, i.e. the ENTITY declaration contains a +percent sign, and the entity is referred to by +%p.like;. This kind of entity must be used to abbreviate +parts of the DTD; the general entities declared without +percent sign and referred to as &name; are not allowed +in this context. + + + +The title element specifies the title of the section in +which it occurs. The title is given as character data, optionally interspersed +with line breaks (br): + + + +]]> + +Compared with the title attribute of +the readme element, this element allows inner markup +(i.e. br) while attribute values do not: It is an error if +an attribute value contains the left angle bracket < literally such that it +is impossible to include inner elements. + + + +The paragraph element p has a structure similar to +title, but it allows more inner elements: + + + + + +]]> + +Line breaks do not have inner structure, so they are declared as being empty: + + + +]]> + +This means that really nothing is allowed within br; you +must always write
]]>
or abbreviated +]]>. +
+ + +Code samples should be marked up by the code tag; emphasized +text can be indicated by em: + + + + + +]]> + +That code elements are not allowed to contain further markup +while em elements do is a design decision by the author of +the DTD. + + + +Unordered lists simply consists of one or more list items, and a list item may +contain paragraph-level material: + + + + + +]]> + +Footnotes are described by the text of the note; this text may contain +text-level markup. There is no mechanism to describe the numbering scheme of +footnotes, or to specify how footnote references are printed. + + + +]]> + +Hyperlinks are written as in HTML. The anchor tag contains the text describing +where the link points to, and the href attribute is the +pointer (as URL). There is no way to describe locations of "hash marks". If the +link refers to another readme document, the attribute +readmeref should be used instead of href. +The reason is that the converted document has usually a different system +identifier (file name), and the link to a converted document must be +converted, too. + + + + +]]> + +Note that although it is only sensible to specify one of the two attributes, +the DTD has no means to express this restriction. + + + +So far the DTD. Finally, here is a document for it: + + + + + + + Usage +

+ The readme converter is invoked on the command line by: +

+

+ readme [ -text | -html ] input.xml +

+

+ Here a list of options: +

+
    +
  • +

    -text: specifies that ASCII output should be produced

    +
  • +
  • +

    -html: specifies that HTML output should be produced

    +
  • +
+

+ The input file must be given on the command line. The converted output is + printed to stdout. +

+
+ + Author +

+ The program has been written by + Gerd Stolpmann. +

+
+
+]]>
+ +
+ + +
+ + + + + + Using &markup; + + + Validation + +The parser can be used to validate a document. This means +that all the constraints that must hold for a valid document are actually +checked. Validation is the default mode of &markup;, i.e. every document is +validated while it is being parsed. + + + +In the examples directory of the distribution you find the +pxpvalidate application. It is invoked in the following way: + + +pxpvalidate [ -wf ] file... + + +The files mentioned on the command line are validated, and every warning and +every error messages are printed to stderr. + + + +The -wf switch modifies the behaviour such that a well-formedness parser is +simulated. In this mode, the ELEMENT, ATTLIST, and NOTATION declarations of the +DTD are ignored, and only the ENTITY declarations will take effect. This mode +is intended for documents lacking a DTD. Please note that the parser still +scans the DTD fully and will report all errors in the DTD; such checks are not +required by a well-formedness parser. + + + +The pxpvalidate application is the simplest sensible program +using &markup;, you may consider it as "hello world" program. + + + + + + + + + How to parse a document from an application + +Let me first give a rough overview of the object model of the parser. The +following items are represented by objects: + + + + +Documents: The document representation is more or less the +anchor for the application; all accesses to the parsed entities start here. It +is described by the class document contained in the module +Pxp_document. You can get some global information, such +as the XML declaration the document begins with, the DTD of the document, +global processing instructions, and most important, the document tree. + + + + + +The contents of documents: The contents have the structure +of a tree: Elements contain other elements and textElements may +also contain processing instructions. Unlike other document models, &markup; +separates processing instructions from the rest of the text and provides a +second interface to access them (method pinstr). However, +there is a parser option (enable_pinstr_nodes) which changes +the behaviour of the parser such that extra nodes for processing instructions +are included into the tree. +Furthermore, the tree does normally not contain nodes for XML comments; +they are ignored by default. Again, there is an option +(enable_comment_nodes) changing this. +. + +The common type to represent both kinds of content is node +which is a class type that unifies the properties of elements and character +data. Every node has a list of children (which is empty if the element is empty +or the node represents text); nodes may have attributes; nodes have always text +contents. There are two implementations of node, the class +element_impl for elements, and the class +data_impl for text data. You find these classes and class +types in the module Pxp_document, too. + + + +Note that attribute lists are represented by non-class values. + + + + + +The node extension: For advanced usage, every node of the +document may have an associated extension which is simply +a second object. This object must have the three methods +clone, node, and +set_node as bare minimum, but you are free to add methods as +you want. This is the preferred way to add functionality to the document +treeDue to the typing system it is more or less impossible to +derive recursive classes in O'Caml. To get around this, it is common practice +to put the modifiable or extensible part of recursive objects into parallel +objects. . The class type extension is +defined in Pxp_document, too. + + + + + +The DTD: Sometimes it is necessary to access the DTD of a +document; the average application does not need this feature. The class +dtd describes DTDs, and makes it possible to get +representations of element, entity, and notation declarations as well as +processing instructions contained in the DTD. This class, and +dtd_element, dtd_notation, and +proc_instruction can be found in the module +Pxp_dtd. There are a couple of classes representing +different kinds of entities; these can be found in the module +Pxp_entity. + + + + +Additionally, the following modules play a role: + + + + +Pxp_yacc: Here the main parsing functions such as +parse_document_entity are located. Some additional types and +functions allow the parser to be configured in a non-standard way. + + + + + +Pxp_types: This is a collection of basic types and +exceptions. + + + + +There are some further modules that are needed internally but are not part of +the API. + + + +Let the document to be parsed be stored in a file called +doc.xml. The parsing process is started by calling the +function + + +val parse_document_entity : config -> source -> 'ext spec -> 'ext document + + +defined in the module Pxp_yacc. The first argument +specifies some global properties of the parser; it is recommended to start with +the default_config. The second argument determines where the +document to be parsed comes from; this may be a file, a channel, or an entity +ID. To parse doc.xml, it is sufficient to pass +from_file "doc.xml". + + + +The third argument passes the object specification to use. Roughly +speaking, it determines which classes implement the node objects of which +element types, and which extensions are to be used. The 'ext +polymorphic variable is the type of the extension. For the moment, let us +simply pass default_spec as this argument, and ignore it. + + + +So the following expression parses doc.xml: + + +open Pxp_yacc +let d = parse_document_entity default_config (from_file "doc.xml") default_spec + + +Note that default_config implies that warnings are collected +but not printed. Errors raise one of the exception defined in +Pxp_types; to get readable errors and warnings catch the +exceptions as follows: + + + + print_endline (Pxp_types.string_of_exn e) +]]> + +Now d is an object of the document +class. If you want the node tree, you can get the root element by + + +let root = d # root + + +and if you would rather like to access the DTD, determine it by + + +let dtd = d # dtd + + +As it is more interesting, let us investigate the node tree now. Given the root +element, it is possible to recursively traverse the whole tree. The children of +a node n are returned by the method +sub_nodes, and the type of a node is returned by +node_type. This function traverses the tree, and prints the +type of each node: + + + + print_endline ("Element of type " ^ name); + let children = n # sub_nodes in + List.iter print_structure children + | T_data -> + print_endline "Data" + | _ -> + (* Other node types are not possible unless the parser is configured + differently. + *) + assert false +]]> + +You can call this function by + + +print_structure root + + +The type returned by node_type is either T_element +name or T_data. The name of the +element type is the string included in the angle brackets. Note that only +elements have children; data nodes are always leaves of the tree. + + + +There are some more methods in order to access a parsed node tree: + + + + +n # parent: Returns the parent node, or raises +Not_found if the node is already the root + + + + +n # root: Returns the root of the node tree. + + + + +n # attribute a: Returns the value of the attribute with +name a. The method returns a value for every +declared attribute, independently of whether the attribute +instance is defined or not. If the attribute is not declared, +Not_found will be raised. (In well-formedness mode, every +attribute is considered as being implicitly declared with type +CDATA.) + + + +The following return values are possible: Value s, +Valuelist sl , and Implied_value. +The first two value types indicate that the attribute value is available, +either because there is a definition +a="value" +in the XML text, or because there is a default value (declared in the +DTD). Only if both the instance definition and the default declaration are +missing, the latter value Implied_value will be returned. + + + +In the DTD, every attribute is typed. There are single-value types (CDATA, ID, +IDREF, ENTITY, NMTOKEN, enumerations), in which case the method passes +Value s back, where s is the normalized +string value of the attribute. The other types (IDREFS, ENTITIES, NMTOKENS) +represent list values, and the parser splits the XML literal into several +tokens and returns these tokens as Valuelist sl. + + + +Normalization means that entity references (the +&name; tokens) and +character references +(&#number;) are replaced +by the text they represent, and that white space characters are converted into +plain spaces. + + + + +n # data: Returns the character data contained in the +node. For data nodes, the meaning is obvious as this is the main content of +data nodes. For element nodes, this method returns the concatenated contents of +all inner data nodes. + + +Note that entity references included in the text are resolved while they are +being parsed; for example the text will be returned +as b"]]> by this method. Spaces of data nodes are always +preserved. Newlines are preserved, but always converted to \n characters even +if newlines are encoded as \r\n or \r. Normally you will never see two adjacent +data nodes because the parser collapses all data material at one location into +one node. (However, if you create your own tree or transform the parsed tree, +it is possible to have adjacent data nodes.) + + +Note that elements that do not allow #PCDATA as content +will not have data nodes as children. This means that spaces and newlines, the +only character material allowed for such elements, are silently dropped. + + + + +For example, if the task is to print all contents of elements with type +"valuable" whose attribute "priority" is "1", this function can help: + + + + print_endline "Valuable node with priotity 1 found:"; + print_endline (n # data) + | (T_element _ | T_data) -> + let children = n # sub_nodes in + List.iter print_valuable_prio1 children + | _ -> + assert false +]]> + +You can call this function by: + + +print_valuable_prio1 root + + +If you like a DSSSL-like style, you can make the function +process_children explicit: + + + + print_endline "Valuable node with priority 1 found:"; + print_endline (n # data) + | (T_element _ | T_data) -> + process_children n + | _ -> + assert false +]]> + +So far, O'Caml is now a simple "style-sheet language": You can form a big +"match" expression to distinguish between all significant cases, and provide +different reactions on different conditions. But this technique has +limitations; the "match" expression tends to get larger and larger, and it is +difficult to store intermediate values as there is only one big +recursion. Alternatively, it is also possible to represent the various cases as +classes, and to use dynamic method lookup to find the appropiate class. The +next section explains this technique in detail. + + + + + + + + + + Class-based processing of the node tree + +By default, the parsed node tree consists of objects of the same class; this is +a good design as long as you want only to access selected parts of the +document. For complex transformations, it may be better to use different +classes for objects describing different element types. + + + +For example, if the DTD declares the element types a, +b, and c, and if the task is to convert +an arbitrary document into a printable format, the idea is to define for every +element type a separate class that has a method print. The +classes are eltype_a, eltype_b, and +eltype_c, and every class implements +print such that elements of the type corresponding to the +class are converted to the output format. + + + +The parser supports such a design directly. As it is impossible to derive +recursive classes in O'CamlThe problem is that the subclass is +usually not a subtype in this case because O'Caml has a contravariant subtyping +rule. , the specialized element classes cannot be formed by +simply inheriting from the built-in classes of the parser and adding methods +for customized functionality. To get around this limitation, every node of the +document tree is represented by two objects, one called +"the node" and containing the recursive definition of the tree, one called "the +extension". Every node object has a reference to the extension, and the +extension has a reference to the node. The advantage of this model is that it +is now possible to customize the extension without affecting the typing +constraints of the recursive node definition. + + + +Every extension must have the three methods clone, +node, and set_node. The method +clone creates a deep copy of the extension object and +returns it; node returns the node object for this extension +object; and set_node is used to tell the extension object +which node is associated with it, this method is automatically called when the +node tree is initialized. The following definition is a good starting point +for these methods; usually clone must be further refined +when instance variables are added to the class: + + +} + method node = + match node with + None -> + assert false + | Some n -> n + method set_node n = + node <- Some n + + end +]]> + + +This part of the extension is usually the same for all classes, so it is a good +idea to consider custom_extension as the super-class of the +further class definitions. Continuining the example of above, we can define the +element type classes as follows: + + + unit + end + +class eltype_a = + object (self) + inherit custom_extension + method print ch = ... + end + +class eltype_b = + object (self) + inherit custom_extension + method print ch = ... + end + +class eltype_c = + object (self) + inherit custom_extension + method print ch = ... + end +]]> + +The method print can now be implemented for every element +type separately. Note that you get the associated node by invoking + + +self # node + + +and you get the extension object of a node n by writing + + +n # extension + + +It is guaranteed that + + +self # node # extension == self + + +always holds. + + + Here are sample definitions of the print +methods: + +... are only containers: *) + output_string ch "("; + List.iter + (fun n -> n # extension # print ch) + (self # node # sub_nodes); + output_string ch ")"; + end + +class eltype_b = + object (self) + inherit custom_extension + method print ch = + (* Print the value of the CDATA attribute "print": *) + match self # node # attribute "print" with + Value s -> output_string ch s + | Implied_value -> output_string ch "" + | Valuelist l -> assert false + (* not possible because the att is CDATA *) + end + +class eltype_c = + object (self) + inherit custom_extension + method print ch = + (* Print the contents of this element: *) + output_string ch (self # node # data) + end + +class null_extension = + object (self) + inherit custom_extension + method print ch = assert false + end +]]> + + + + +The remaining task is to configure the parser such that these extension classes +are actually used. Here another problem arises: It is not possible to +dynamically select the class of an object to be created. As workaround, +&markup; allows the user to specify exemplar objects for +the various element types; instead of creating the nodes of the tree by +applying the new operator the nodes are produced by +duplicating the exemplars. As object duplication preserves the class of the +object, one can create fresh objects of every class for which previously an +exemplar has been registered. + + + +Exemplars are meant as objects without contents, the only interesting thing is +that exemplars are instances of a certain class. The creation of an exemplar +for an element node can be done by: + + +let element_exemplar = new element_impl extension_exemplar + + +And a data node exemplar is created by: + + +let data_exemplar = new data_impl extension_exemplar + + +The classes element_impl and data_impl +are defined in the module Pxp_document. The constructors +initialize the fresh objects as empty objects, i.e. without children, without +data contents, and so on. The extension_exemplar is the +initial extension object the exemplars are associated with. + + + +Once the exemplars are created and stored somewhere (e.g. in a hash table), you +can take an exemplar and create a concrete instance (with contents) by +duplicating it. As user of the parser you are normally not concerned with this +as this is part of the internal logic of the parser, but as background knowledge +it is worthwhile to mention that the two methods +create_element and create_data actually +perform the duplication of the exemplar for which they are invoked, +additionally apply modifications to the clone, and finally return the new +object. Moreover, the extension object is copied, too, and the new node object +is associated with the fresh extension object. Note that this is the reason why +every extension object must have a clone method. + + + +The configuration of the set of exemplars is passed to the +parse_document_entity function as third argument. In our +example, this argument can be set up as follows: + + + + +The ~element_alist function argument defines the mapping +from element types to exemplars as associative list. The argument +~data_exemplar specifies the exemplar for data nodes, and +the ~default_element_exemplar is used whenever the parser +finds an element type for which the associative list does not define an +exemplar. + + + +The configuration is now complete. You can still use the same parsing +functions, only the initialization is a bit different. For example, call the +parser by: + + +let d = parse_document_entity default_config (from_file "doc.xml") spec + + +Note that the resulting document d has a usable type; +especially the print method we added is visible. So you can +print your document by + + +d # root # extension # print stdout + + + + +This object-oriented approach looks rather complicated; this is mostly caused +by working around some problems of the strict typing system of O'Caml. Some +auxiliary concepts such as extensions were needed, but the practical +consequences are low. In the next section, one of the examples of the +distribution is explained, a converter from readme +documents to HTML. + + + + + + + + + + Example: An HTML backend for the <emphasis>readme</emphasis> +DTD + + The converter from readme documents to HTML +documents follows strictly the approach to define one class per element +type. The HTML code is similar to the readme source, +because of this most elements can be converted in the following way: Given the +input element + + +content]]> + + +the conversion text is the concatenation of a computed prefix, the recursively +converted content, and a computed suffix. + + + +Only one element type cannot be handled by this scheme: +footnote. Footnotes are collected while they are found in +the input text, and they are printed after the main text has been converted and +printed. + + + + Header + +&readme.code.header; + + + + + Type declarations + +&readme.code.footnote-printer; + + + + + Class <literal>store</literal> + +The store is a container for footnotes. You can add a +footnote by invoking alloc_footnote; the argument is an +object of the class footnote_printer, the method returns the +number of the footnote. The interesting property of a footnote is that it can +be converted to HTML, so a footnote_printer is an object +with a method footnote_to_html. The class +footnote which is defined below has a compatible method +footnote_to_html such that objects created from it can be +used as footnote_printers. + + +The other method, print_footnotes prints the footnotes as +definition list, and is typically invoked after the main material of the page +has already been printed. Every item of the list is printed by +footnote_to_html. + + + +&readme.code.store; + + + + + Function <literal>escape_html</literal> + +This function converts the characters <, >, &, and " to their HTML +representation. For example, +escape_html "<>" = "&lt;&gt;". Other +characters are left unchanged. + +&readme.code.escape-html; + + + + + Virtual class <literal>shared</literal> + +This virtual class is the abstract superclass of the extension classes shown +below. It defines the standard methods clone, +node, and set_node, and declares the type +of the virtual method to_html. This method recursively +traverses the whole element tree, and prints the converted HTML code to the +output channel passed as second argument. The first argument is the reference +to the global store object which collects the footnotes. + +&readme.code.shared; + + + + + Class <literal>only_data</literal> + +This class defines to_html such that the character data of +the current node is converted to HTML. Note that self is an +extension object, self # node is the node object, and +self # node # data returns the character data of the node. + +&readme.code.only-data; + + + + + Class <literal>readme</literal> + +This class converts elements of type readme to HTML. Such an +element is (by definition) always the root element of the document. First, the +HTML header is printed; the title attribute of the element +determines the title of the HTML page. Some aspects of the HTML page can be +configured by setting certain parameter entities, for example the background +color, the text color, and link colors. After the header, the +body tag, and the headline have been printed, the contents +of the page are converted by invoking to_html on all +children of the current node (which is the root node). Then, the footnotes are +appended to this by telling the global store object to print +the footnotes. Finally, the end tags of the HTML pages are printed. + + + +This class is an example how to access the value of an attribute: The value is +determined by invoking self # node # attribute "title". As +this attribute has been declared as CDATA and as being required, the value has +always the form Value s where s is the +string value of the attribute. + + + +You can also see how entity contents can be accessed. A parameter entity object +can be looked up by self # node # dtd # par_entity "name", +and by invoking replacement_text the value of the entity +is returned after inner parameter and character entities have been +processed. Note that you must use gen_entity instead of +par_entity to access general entities. + + + +&readme.code.readme; + + + + + Classes <literal>section</literal>, <literal>sect1</literal>, +<literal>sect2</literal>, and <literal>sect3</literal> + +As the conversion process is very similar, the conversion classes of the three +section levels are derived from the more general section +class. The HTML code of the section levels only differs in the type of the +headline, and because of this the classes describing the section levels can be +computed by replacing the class argument the_tag of +section by the HTML name of the headline tag. + + + +Section elements are converted to HTML by printing a headline and then +converting the contents of the element recursively. More precisely, the first +sub-element is always a title element, and the other +elements are the contents of the section. This structure is declared in the +DTD, and it is guaranteed that the document matches the DTD. Because of this +the title node can be separated from the rest without any checks. + + + +Both the title node, and the body nodes are then converted to HTML by calling +to_html on them. + + + +&readme.code.section; + + + + + Classes <literal>map_tag</literal>, <literal>p</literal>, +<literal>em</literal>, <literal>ul</literal>, <literal>li</literal> + +Several element types are converted to HTML by simply mapping them to +corresponding HTML element types. The class map_tag +implements this, and the class argument the_target_tag +determines the tag name to map to. The output consists of the start tag, the +recursively converted inner elements, and the end tag. + +&readme.code.map-tag; + + + + + Class <literal>br</literal> + +Element of type br are mapped to the same HTML type. Note +that HTML forbids the end tag of br. + +&readme.code.br; + + + + + Class <literal>code</literal> + +The code type is converted to a pre +section (preformatted text). As the meaning of tabs is unspecified in HTML, +tabs are expanded to spaces. + +&readme.code.code; + + + + + Class <literal>a</literal> + +Hyperlinks, expressed by the a element type, are converted +to the HTML a type. If the target of the hyperlink is given +by href, the URL of this attribute can be used +directly. Alternatively, the target can be given by +readmeref in which case the ".html" suffix must be added to +the file name. + + + +Note that within a only #PCDATA is allowed, so the contents +can be converted directly by applying escape_html to the +character data contents. + +&readme.code.a; + + + + + Class <literal>footnote</literal> + +The footnote class has two methods: +to_html to convert the footnote reference to HTML, and +footnote_to_html to convert the footnote text itself. + + + +The footnote reference is converted to a local hyperlink; more precisely, to +two anchor tags which are connected with each other. The text anchor points to +the footnote anchor, and the footnote anchor points to the text anchor. + + + +The footnote must be allocated in the store object. By +allocating the footnote, you get the number of the footnote, and the text of +the footnote is stored until the end of the HTML page is reached when the +footnotes can be printed. The to_html method stores simply +the object itself, such that the footnote_to_html method is +invoked on the same object that encountered the footnote. + + + +The to_html only allocates the footnote, and prints the +reference anchor, but it does not print nor convert the contents of the +note. This is deferred until the footnotes actually get printed, i.e. the +recursive call of to_html on the sub nodes is done by +footnote_to_html. + + + +Note that this technique does not work if you make another footnote within a +footnote; the second footnote gets allocated but not printed. + + + +&readme.code.footnote; + + + + + The specification of the document model + +This code sets up the hash table that connects element types with the exemplars +of the extension classes that convert the elements to HTML. + +&readme.code.tag-map; + + + + + + + + + + + + The objects representing the document + + +This description might be out-of-date. See the module interface files +for updated information. + + + The <literal>document</literal> class + + + + object + method init_xml_version : string -> unit + method init_root : 'ext node -> unit + + method xml_version : string + method xml_standalone : bool + method dtd : dtd + method root : 'ext node + + method encoding : Pxp_types.rep_encoding + + method add_pinstr : proc_instruction -> unit + method pinstr : string -> proc_instruction list + method pinstr_names : string list + + method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit + + end +;; +]]> + + +The methods beginning with init_ are only for internal use +of the parser. + + + + + +xml_version: returns the version string at the beginning of +the document. For example, "1.0" is returned if the document begins with +<?xml version="1.0"?>. + + + +xml_standalone: returns the boolean value of +standalone declaration in the XML declaration. If the +standalone attribute is missing, false is +returned. + + + +dtd: returns a reference to the global DTD object. + + + +root: returns a reference to the root element. + + + +encoding: returns the internal encoding of the +document. This means that all strings of which the document consists are +encoded in this character set. + + + + +pinstr: returns the processing instructions outside the DTD +and outside the root element. The argument passed to the method names a +target, and the method returns all instructions with this +target. The target is the first word inside <? and +?>. + + + +pinstr_names: returns the names of the processing instructions + + + +add_pinstr: adds another processing instruction. This method +is used by the parser itself to enter the instructions returned by +pinstr, but you can also enter additional instructions. + + + + +write: writes the document to the passed stream as XML +text using the passed (external) encoding. The generated text is always valid +XML and can be parsed by PXP; however, the text is badly formatted (this is not +a pretty printer). + + + + + + + + The class type <literal>node</literal> + + +From Pxp_document: + + +type node_type = + T_data +| T_element of string +| T_super_root +| T_pinstr of string +| T_comment +and some other, reserved types +;; + +class type [ 'ext ] node = + object ('self) + constraint 'ext = 'ext node #extension + + (* *) + + method extension : 'ext + method dtd : dtd + method parent : 'ext node + method root : 'ext node + method sub_nodes : 'ext node list + method iter_nodes : ('ext node &fun; unit) &fun; unit + method iter_nodes_sibl : + ('ext node option &fun; 'ext node &fun; 'ext node option &fun; unit) &fun; unit + method node_type : node_type + method encoding : Pxp_types.rep_encoding + method data : string + method position : (string * int * int) + method comment : string option + method pinstr : string &fun; proc_instruction list + method pinstr_names : string list + method write : Pxp_types.output_stream -> Pxp_types.encoding -> unit + + (* *) + + method attribute : string &fun; Pxp_types.att_value + method required_string_attribute : string &fun; string + method optional_string_attribute : string &fun; string option + method required_list_attribute : string &fun; string list + method optional_list_attribute : string &fun; string list + method attribute_names : string list + method attribute_type : string &fun; Pxp_types.att_type + method attributes : (string * Pxp_types.att_value) list + method id_attribute_name : string + method id_attribute_value : string + method idref_attribute_names : string + + (* *) + + method add_node : ?force:bool &fun; 'ext node &fun; unit + method add_pinstr : proc_instruction &fun; unit + method delete : unit + method set_nodes : 'ext node list &fun; unit + method quick_set_attributes : (string * Pxp_types.att_value) list &fun; unit + method set_comment : string option &fun; unit + + (* *) + + method orphaned_clone : 'self + method orphaned_flat_clone : 'self + method create_element : + ?position:(string * int * int) &fun; + dtd &fun; node_type &fun; (string * string) list &fun; + 'ext node + method create_data : dtd &fun; string &fun; 'ext node + method keep_always_whitespace_mode : unit + + (* *) + + method local_validate : ?use_dfa:bool -> unit -> unit + + (* ... Internal methods are undocumented. *) + + end +;; + + +In the module Pxp_types you can find another type +definition that is important in this context: + + +type Pxp_types.att_value = + Value of string + | Valuelist of string list + | Implied_value +;; + + + + + The structure of document trees + + +A node represents either an element or a character data section. There are two +classes implementing the two aspects of nodes: element_impl +and data_impl. The latter class does not implement all +methods because some methods do not make sense for data nodes. + + + +(Note: PXP also supports a mode which forces that processing instructions and +comments are represented as nodes of the document tree. However, these nodes +are instances of element_impl with node types +T_pinstr and T_comment, +respectively. This mode must be explicitly configured; the basic representation +knows only element and data nodes.) + + + The following figure +() shows an example how +a tree is constructed from element and data nodes. The circular areas +represent element nodes whereas the ovals denote data nodes. Only elements +may have subnodes; data nodes are always leaves of the tree. The subnodes +of an element can be either element or data nodes; in both cases the O'Caml +objects storing the nodes have the class type node. + + Attributes (the clouds in the picture) are not directly +integrated into the tree; there is always an extra link to the attribute +list. This is also true for processing instructions (not shown in the +picture). This means that there are separated access methods for attributes and +processing instructions. + +
+A tree with element nodes, data nodes, and attributes + +
+ + Only elements, data sections, attributes and processing +instructions (and comments, if configured) can, directly or indirectly, occur +in the document tree. It is impossible to add entity references to the tree; if +the parser finds such a reference, not the reference as such but the referenced +text (i.e. the tree representing the structured text) is included in the +tree. + + Note that the parser collapses as much data material into one +data node as possible such that there are normally never two adjacent data +nodes. This invariant is enforced even if data material is included by entity +references or CDATA sections, or if a data sequence is interrupted by +comments. So a &amp; b <-- comment --> c <![CDATA[ +<> d]]> is represented by only one data node, for +instance. However, you can create document trees manually which break this +invariant; it is only the way the parser forms the tree. + + +
+Nodes are doubly linked trees + +
+ + +The node tree has links in both directions: Every node has a link to its parent +(if any), and it has links to the subnodes (see +figure ). Obviously, +this doubly-linked structure simplifies the navigation in the tree; but has +also some consequences for the possible operations on trees. + + +Because every node must have at most one parent node, +operations are illegal if they violate this condition. The following figure +() shows on the left side +that node y is added to x as new subnode +which is allowed because y does not have a parent yet. The +right side of the picture illustrates what would happen if y +had a parent node; this is illegal because y would have two +parents after the operation. + +
+A node can only be added if it is a root + + +
+ + +The "delete" operation simply removes the links between two nodes. In the +picture () the node +x is deleted from the list of subnodes of +y. After that, x becomes the root of the +subtree starting at this node. + +
+A deleted node becomes the root of the subtree + +
+ + +It is also possible to make a clone of a subtree; illustrated in +. In this case, the +clone is a copy of the original subtree except that it is no longer a +subnode. Because cloning never keeps the connection to the parent, the clones +are called orphaned. + + +
+The clone of a subtree + +
+
+ + + The methods of the class type <literal>node</literal> + + + + + <link linkend="type-node-general.sig">General observers</link> + + + + + + +extension: The reference to the extension object which +belongs to this node (see ...). + + + +dtd: Returns a reference to the global DTD. All nodes +of a tree must share the same DTD. + + + + +parent: Get the father node. Raises +Not_found in the case the node does not have a +parent, i.e. the node is the root. + + + +root: Gets the reference to the root node of the tree. +Every node is contained in a tree with a root, so this method always +succeeds. Note that this method searches the root, +which costs time proportional to the length of the path to the root. + + + + +sub_nodes: Returns references to the children. The returned +list reflects the order of the children. For data nodes, this method returns +the empty list. + + + + +iter_nodes f: Iterates over the children, and calls +f for every child in turn. + + + + +iter_nodes_sibl f: Iterates over the children, and calls +f for every child in turn. f gets as +arguments the previous node, the current node, and the next node. + + + +node_type: Returns either T_data which +means that the node is a data node, or T_element n +which means that the node is an element of type n. +If configured, possible node types are also T_pinstr t +indicating that the node represents a processing instruction with target +t, and T_comment in which case the node +is a comment. + + + + +encoding: Returns the encoding of the strings. + + + +data: Returns the character data of this node and all +children, concatenated as one string. The encoding of the string is what +the method encoding returns. +- For data nodes, this method simply returns the represented characters. +For elements, the meaning of the method has been extended such that it +returns something useful, i.e. the effectively contained characters, without +markup. (For T_pinstr and T_comment +nodes, the method returns the empty string.) + + + + +position: If configured, this method returns the position of +the element as triple (entity, line, byteposition). For data nodes, the +position is not stored. If the position is not available the triple +"?", 0, 0 is returned. + + + + +comment: Returns Some text for comment +nodes, and None for other nodes. The text +is everything between the comment delimiters <-- and +-->. + + + + +pinstr n: Returns all processing instructions that are +directly contained in this element and that have a target +specification of n. The target is the first word after +the <?. + + + + +pinstr_names: Returns the list of all targets of processing +instructions directly contained in this element. + + + +write s enc: Prints the node and all subnodes to the passed +output stream as valid XML text, using the passed external encoding. + + + + + + + + + + <link linkend="type-node-atts.sig">Attribute observers</link> + + + + + +attribute n: Returns the value of the attribute with name +n. This method returns a value for every declared +attribute, and it raises Not_found for any undeclared +attribute. Note that it even returns a value if the attribute is actually +missing but is declared as #IMPLIED or has a default +value. - Possible values are: + + + +Implied_value: The attribute has been declared with the +keyword #IMPLIED, and the attribute is missing in the +attribute list of this element. + + + +Value s: The attribute has been declared as type +CDATA, as ID, as +IDREF, as ENTITY, or as +NMTOKEN, or as enumeration or notation, and one of the two +conditions holds: (1) The attribute value is present in the attribute list in +which case the value is returned in the string s. (2) The +attribute has been omitted, and the DTD declared the attribute with a default +value. The default value is returned in s. +- Summarized, Value s is returned for non-implied, non-list +attribute values. + + + + +Valuelist l: The attribute has been declared as type +IDREFS, as ENTITIES, or +as NMTOKENS, and one of the two conditions holds: (1) The +attribute value is present in the attribute list in which case the +space-separated tokens of the value are returned in the string list +l. (2) The attribute has been omitted, and the DTD declared +the attribute with a default value. The default value is returned in +l. +- Summarized, Valuelist l is returned for all list-type +attribute values. + + + + +Note that before the attribute value is returned, the value is normalized. This +means that newlines are converted to spaces, and that references to character +entities (i.e. &#n;) and +general entities +(i.e. &name;) are expanded; +if necessary, expansion is performed recursively. + + + +In well-formedness mode, there is no DTD which could declare an +attribute. Because of this, every occuring attribute is considered as a CDATA +attribute. + + + + +required_string_attribute n: returns the Value attribute +called n, or the Valuelist attribute as a string where the list elements +are separated by spaces. If the attribute value is implied, or if the +attribute does not exists, the method will fail. - This method is convenient +if you expect a non-implied and non-list attribute value. + + + + +optional_string_attribute n: returns the Value attribute +called n, or the Valuelist attribute as a string where the list elements +are separated by spaces. If the attribute value is implied, or if the +attribute does not exists, the method returns None. - This method is +convenient if you expect a non-list attribute value including the implied +value. + + + + +required_list_attribute n: returns the Valuelist attribute +called n, or the Value attribute as a list with a single element. +If the attribute value is implied, or if the +attribute does not exists, the method will fail. - This method is +convenient if you expect a list attribute value. + + + + +optional_list_attribute n: returns the Valuelist attribute +called n, or the Value attribute as a list with a single element. +If the attribute value is implied, or if the +attribute does not exists, an empty list will be returned. - This method +is convenient if you expect a list attribute value or the implied value. + + + + +attribute_names: returns the list of all attribute names of +this element. As this is a validating parser, this list is equal to the +list of declared attributes. + + + + +attribute_type n: returns the type of the attribute called +n. See the module Pxp_types for a +description of the encoding of the types. + + + + +attributes: returns the list of pairs of names and values +for all attributes of +this element. + + + +id_attribute_name: returns the name of the attribute that is +declared with type ID. There is at most one such attribute. The method raises +Not_found if there is no declared ID attribute for the +element type. + + + +id_attribute_value: returns the value of the attribute that +is declared with type ID. There is at most one such attribute. The method raises +Not_found if there is no declared ID attribute for the +element type. + + + +idref_attribute_names: returns the list of attribute names +that are declared as IDREF or IDREFS. + + + + + + + + + <link linkend="type-node-mods.sig">Modifying methods</link> + + + +The following methods are only defined for element nodes (more exactly: +the methods are defined for data nodes, too, but fail always). + + + + +add_node sn: Adds sub node sn to the list +of children. This operation is illustrated in the picture +. This method expects that +sn is a root, and it requires that sn and +the current object share the same DTD. + + +Because add_node is the method the parser itself uses +to add new nodes to the tree, it performs by default some simple validation +checks: If the content model is a regular expression, it is not allowed to add +data nodes to this node unless the new nodes consist only of whitespace. In +this case, the new data nodes are silently dropped (you can change this by +invoking keep_always_whitespace_mode). + + +If the document is flagged as stand-alone, these data nodes only +containing whitespace are even forbidden if the element declaration is +contained in an external entity. This case is detected and rejected. + +If the content model is EMPTY, it is not allowed to +add any data node unless the data node is empty. In this case, the new data +node is silently dropped. + + +These checks only apply if there is a DTD. In well-formedness mode, it is +assumed that every element is declared with content model +ANY which prohibits any validation check. Furthermore, you +turn these checks off by passing ~force:true as first +argument. + + + +add_pinstr pi: Adds the processing instruction +pi to the list of processing instructions. + + + + + +delete: Deletes this node from the tree. After this +operation, this node is no longer the child of the former father node; and the +node loses the connection to the father as well. This operation is illustrated +by the figure . + + + + +set_nodes nl: Sets the list of children to +nl. It is required that every member of nl +is a root, and that all members and the current object share the same DTD. +Unlike add_node, no validation checks are performed. + + + + +quick_set_attributes atts: sets the attributes of this +element to atts. It is not checked +whether atts matches the DTD or not; it is up to the +caller of this method to ensure this. (This method may be useful to transform +the attribute values, i.e. apply a mapping to every attribute.) + + + + +set_comment text: This method is only applicable to +T_comment nodes; it sets the comment text contained by such +nodes. + + + + + + + + + <link linkend="type-node-cloning.sig">Cloning methods</link> + + + + + + +orphaned_clone: Returns a clone of the node and the complete +tree below this node (deep clone). The clone does not have a parent (i.e. the +reference to the parent node is not cloned). While +copying the subtree, strings are skipped; it is likely that the original tree +and the copy tree share strings. Extension objects are cloned by invoking +the clone method on the original objects; how much of +the extension objects is cloned depends on the implemention of this method. + + This operation is illustrated by the figure +. + + + + +orphaned_flat_clone: Returns a clone of the node, +but sets the list of sub nodes to [], i.e. the sub nodes are not cloned. + + + + + +create_element dtd nt al: Returns a flat copy of this node +(which must be an element) with the following modifications: The DTD is set to +dtd; the node type is set to nt, and the +new attribute list is set to al (given as list of +(name,value) pairs). The copy does not have children nor a parent. It does not +contain processing instructions. See +the example below. + + + Note that you can specify the position of the new node +by the optional argument ~position. + + + + +create_data dtd cdata: Returns a flat copy of this node +(which must be a data node) with the following modifications: The DTD is set to +dtd; the node type is set to T_data; the +attribute list is empty (data nodes never have attributes); the list of +children and PIs is empty, too (same reason). The new node does not have a +parent. The value cdata is the new character content of the +node. See +the example below. + + + + +keep_always_whitespace_mode: Even data nodes which are +normally dropped because they only contain ignorable whitespace, can added to +this node once this mode is turned on. (This mode is useful to produce +canonical XML.) + + + + + + + + + + <link linkend="type-node-weird.sig">Validating methods</link> + + +There is one method which locally validates the node, i.e. checks whether the +subnodes match the content model of this node. + + + + +local_validate: Checks that this node conforms to the +DTD by comparing the type of the subnodes with the content model for this +node. (Applications need not call this method unless they add new nodes +themselves to the tree.) + + + + + + + + + The class <literal>element_impl</literal> + +This class is an implementation of node which +realizes element nodes: + + + [ 'ext ] node +]]> + + + + + Constructor + +You can create a new instance by + + +new element_impl extension_object + + +which creates a special form of empty element which already contains a +reference to the extension_object, but is +otherwise empty. This special form is called an +exemplar. The purpose of exemplars is that they serve as +patterns that can be duplicated and filled with data. The method + +create_element is designed to perform this action. + + + + + + Example + + First, create an exemplar by + + +let exemplar_ext = ... in +let exemplar = new element_impl exemplar_ext in + + +The exemplar is not used in node trees, but only as +a pattern when the element nodes are created: + + +let element = exemplar # create_element dtd (T_element name) attlist + + +The element is a copy of exemplar +(even the extension exemplar_ext has been copied) +which ensures that element and its extension are objects +of the same class as the exemplars; note that you need not to pass a +class name or other meta information. The copy is initially connected +with the dtd, it gets a node type, and the attribute list +is filled. The element is now fully functional; it can +be added to another element as child, and it can contain references to +subnodes. + + + + + + + The class <literal>data_impl</literal> + +This class is an implementation of node which +should be used for all character data nodes: + + + [ 'ext ] node +]]> + + + + + + Constructor + +You can create a new instance by + + +new data_impl extension_object + + +which creates an empty exemplar node which is connected to +extension_object. The node does not contain a +reference to any DTD, and because of this it cannot be added to node trees. + + + + To get a fully working data node, apply the method +create_data + to the exemplar (see example). + + + + + Example + + First, create an exemplar by + + +let exemplar_ext = ... in +let exemplar = new exemplar_ext data_impl in + + +The exemplar is not used in node trees, but only as +a pattern when the data nodes are created: + + +let data_node = exemplar # create_data dtd "The characters contained in the data node" + + +The data_node is a copy of exemplar. +The copy is initially connected +with the dtd, and it is filled with character material. +The data_node is now fully functional; it can +be added to an element as child. + + + + + + The type <literal>spec</literal> + +The type spec defines a way to handle the details of +creating nodes from exemplars. + + + ?comment_exemplar : 'ext node -> + ?default_pinstr_exemplar : 'ext node -> + ?pinstr_mapping : (string, 'ext node) Hashtbl.t -> + data_exemplar: 'ext node -> + default_element_exemplar: 'ext node -> + element_mapping: (string, 'ext node) Hashtbl.t -> + unit -> + 'ext spec + +val make_spec_from_alist : + ?super_root_exemplar : 'ext node -> + ?comment_exemplar : 'ext node -> + ?default_pinstr_exemplar : 'ext node -> + ?pinstr_alist : (string * 'ext node) list -> + data_exemplar: 'ext node -> + default_element_exemplar: 'ext node -> + element_alist: (string * 'ext node) list -> + unit -> + 'ext spec +]]> + +The two functions make_spec_from_mapping and +make_spec_from_alist create spec +values. Both functions are functionally equivalent and the only difference is +that the first function prefers hashtables and the latter associative lists to +describe mappings from names to exemplars. + + + +You can specify exemplars for the various kinds of nodes that need to be +generated when an XML document is parsed: + + + + ~super_root_exemplar: This exemplar +is used to create the super root. This special node is only created if the +corresponding configuration option has been selected; it is the parent node of +the root node which may be convenient if every working node must have a parent. + + + ~comment_exemplar: This exemplar is +used when a comment node must be created. Note that such nodes are only created +if the corresponding configuration option is "on". + + + + ~default_pinstr_exemplar: If a node +for a processing instruction must be created, and the instruction is not listed +in the table passed by ~pinstr_mapping or +~pinstr_alist, this exemplar is used. +Again the configuration option must be "on" in order to create such nodes at +all. + + + + ~pinstr_mapping or +~pinstr_alist: Map the target names of processing +instructions to exemplars. These mappings are only used when nodes for +processing instructions are created. + + + ~data_exemplar: The exemplar for +ordinary data nodes. + + + ~default_element_exemplar: This +exemplar is used if an element node must be created, but the element type +cannot be found in the tables element_mapping or +element_alist. + + + ~element_mapping or +~element_alist: Map the element types to exemplars. These +mappings are used to create element nodes. + + + +In most cases, you only want to create spec values to pass +them to the parser functions found in Pxp_yacc. However, it +might be useful to apply spec values directly. + + +The following functions create various types of nodes by selecting the +corresponding exemplar from the passed spec value, and by +calling create_element or create_data on +the exemplar. + + + dtd -> + (* data material: *) string -> + 'ext node + +val create_element_node : + ?position:(string * int * int) -> + 'ext spec -> + dtd -> + (* element type: *) string -> + (* attributes: *) (string * string) list -> + 'ext node + +val create_super_root_node : + ?position:(string * int * int) -> + 'ext spec -> + dtd -> + 'ext node + +val create_comment_node : + ?position:(string * int * int) -> + 'ext spec -> + dtd -> + (* comment text: *) string -> + 'ext node + +val create_pinstr_node : + ?position:(string * int * int) -> + 'ext spec -> + dtd -> + proc_instruction -> + 'ext node +]]> + + + + + Examples + + + Building trees. + + Here is the piece of code that creates the tree of +the figure . The extension +object and the DTD are beyond the scope of this example. + + +let exemplar_ext = ... (* some extension *) in +let dtd = ... (* some DTD *) in + +let element_exemplar = new element_impl exemplar_ext in +let data_exemplar = new data_impl exemplar_ext in + +let a1 = element_exemplar # create_element dtd (T_element "a") ["att", "apple"] +and b1 = element_exemplar # create_element dtd (T_element "b") [] +and c1 = element_exemplar # create_element dtd (T_element "c") [] +and a2 = element_exemplar # create_element dtd (T_element "a") ["att", "orange"] +in + +let cherries = data_exemplar # create_data dtd "Cherries" in +let orange = data_exemplar # create_data dtd "An orange" in + +a1 # add_node b1; +a1 # add_node c1; +b1 # add_node a2; +b1 # add_node cherries; +a2 # add_node orange; + + +Alternatively, the last block of statements could also be written as: + + +a1 # set_nodes [b1; c1]; +b1 # set_nodes [a2; cherries]; +a2 # set_nodes [orange]; + + +The root of the tree is a1, i.e. it is true that + + +x # root == a1 + + +for every x from { a1, a2, +b1, c1, cherries, +orange }. + + + +Furthermore, the following properties hold: + + + a1 # attribute "att" = Value "apple" +& a2 # attribute "att" = Value "orange" + +& cherries # data = "Cherries" +& orange # data = "An orange" +& a1 # data = "CherriesAn orange" + +& a1 # node_type = T_element "a" +& a2 # node_type = T_element "a" +& b1 # node_type = T_element "b" +& c1 # node_type = T_element "c" +& cherries # node_type = T_data +& orange # node_type = T_data + +& a1 # sub_nodes = [ b1; c1 ] +& a2 # sub_nodes = [ orange ] +& b1 # sub_nodes = [ a2; cherries ] +& c1 # sub_nodes = [] +& cherries # sub_nodes = [] +& orange # sub_nodes = [] + +& a2 # parent == a1 +& b1 # parent == b1 +& c1 # parent == a1 +& cherries # parent == b1 +& orange # parent == a2 + + + + Searching nodes. + + The following function searches all nodes of a tree +for which a certain condition holds: + + +let rec search p t = + if p t then + t :: search_list p (t # sub_nodes) + else + search_list p (t # sub_nodes) + +and search_list p l = + match l with + [] -> [] + | t :: l' -> (search p t) @ (search_list p l') +;; + + + + + For example, if you want to search all elements of a certain +type et, the function search can be +applied as follows: + + +let search_element_type et t = + search (fun x -> x # node_type = T_element et) t +;; + + + + + Getting attribute values. + + Suppose we have the declaration: + +]]> + + +In this case, every element e must have an attribute +a, otherwise the parser would indicate an error. If +the O'Caml variable n holds the node of the tree +corresponding to the element, you can get the value of the attribute +a by + + +let value_of_a = n # required_string_attribute "a" + + +which is more or less an abbreviation for + + s + | _ -> assert false]]> + + +- as the attribute is required, the attribute method always +returns a Value. + + + + In contrast to this, the attribute b can be +omitted. In this case, the method required_string_attribute +works only if the attribute is there, and the method will fail if the attribute +is missing. To get the value, you can apply the method +optional_string_attribute: + + +let value_of_b = n # optional_string_attribute "b" + + +Now, value_of_b is of type string option, +and None represents the omitted attribute. Alternatively, +you could also use attribute: + + Some s + | Implied_value -> None + | _ -> assert false]]> + + + + The attribute c behaves much like +a, because it has always a value. If the attribute is +omitted, the default, here "12345", will be returned instead. Because of this, +you can again use required_string_attribute to get the +value. + + + The type CDATA is the most general string +type. The types NMTOKEN, ID, +IDREF, ENTITY, and all enumerators and +notations are special forms of string types that restrict the possible +values. From O'Caml, they behave like CDATA, i.e. you can +use the methods required_string_attribute and +optional_string_attribute, too. + + + In contrast to this, the types NMTOKENS, +IDREFS, and ENTITIES mean lists of +strings. Suppose we have the declaration: + +]]> + + +The type NMTOKENS stands for lists of space-separated +tokens; for example the value "1 abc 23ef" means the list +["1"; "abc"; "23ef"]. (Again, IDREFS +and ENTITIES have more restricted values.) To get the +value of attribute d, one can use + + +let value_of_d = n # required_list_attribute "d" + + +or + + l + | _ -> assert false]]> + + +As d is required, the attribute cannot be omitted, and +the attribute method returns always a +Valuelist. + + + For optional attributes like e, apply + + +let value_of_e = n # optional_list_attribute "e" + + +or + + l + | Implied_value -> [] + | _ -> assert false]]> + + +Here, the case that the attribute is missing counts like the empty list. + + + + + + + Iterators + + There are also several iterators in Pxp_document; please see +the mli file for details. You can find examples for them in the +"simple_transformation" directory. + + + f:('ext node -> bool) -> 'ext node -> 'ext node + +val find_all : ?deeply:bool -> + f:('ext node -> bool) -> 'ext node -> 'ext node list + +val find_element : ?deeply:bool -> + string -> 'ext node -> 'ext node + +val find_all_elements : ?deeply:bool -> + string -> 'ext node -> 'ext node list + +exception Skip +val map_tree : pre:('exta node -> 'extb node) -> + ?post:('extb node -> 'extb node) -> + 'exta node -> + 'extb node + + +val map_tree_sibl : + pre: ('exta node option -> 'exta node -> 'exta node option -> + 'extb node) -> + ?post:('extb node option -> 'extb node -> 'extb node option -> + 'extb node) -> + 'exta node -> + 'extb node + +val iter_tree : ?pre:('ext node -> unit) -> + ?post:('ext node -> unit) -> + 'ext node -> + unit + +val iter_tree_sibl : + ?pre: ('ext node option -> 'ext node -> 'ext node option -> unit) -> + ?post:('ext node option -> 'ext node -> 'ext node option -> unit) -> + 'ext node -> + unit +]]> + + + +
+ + + + + The class type <literal>extension</literal> + + + + unit + (* "set_node" is invoked once the extension is associated to a new + * node object. + *) + end +]]> + + +This is the type of classes used for node extensions. For every node of the +document tree, there is not only the node object, but also +an extension object. The latter has minimal +functionality; it has only the necessary methods to be attached to the node +object containing the details of the node instance. The extension object is +called extension because its purpose is extensibility. + + For some reasons, it is impossible to derive the +node classes (i.e. element_impl and +data_impl) such that the subclasses can be extended by new +new methods. But +subclassing nodes is a great feature, because it allows the user to provide +different classes for different types of nodes. The extension objects are a +workaround that is as powerful as direct subclassing, the costs are +some notation overhead. + + +
+The structure of nodes and extensions + + +
+ + The picture shows how the nodes and extensions are linked +together. Every node has a reference to its extension, and every extension has +a reference to its node. The methods extension and +node follow these references; a typical phrase is + + +self # node # attribute "xy" + + +to get the value of an attribute from a method defined in the extension object; +or + + +self # node # iter + (fun n -> n # extension # my_method ...) + + +to iterate over the subnodes and to call my_method of the +corresponding extension objects. + + + Note that extension objects do not have references to subnodes +(or "subextensions") themselves; in order to get one of the children of an +extension you must first go to the node object, then get the child node, and +finally reach the extension that is logically the child of the extension you +started with. + + + How to define an extension class + + At minimum, you must define the methods +clone, node, and +set_node such that your class is compatible with the type +extension. The method set_node is called +during the initialization of the node, or after a node has been cloned; the +node object invokes set_node on the extension object to tell +it that this node is now the object the extension is linked to. The extension +must return the node object passed as argument of set_node +when the node method is called. + + The clone method must return a copy of the +extension object; at least the object itself must be duplicated, but if +required, the copy should deeply duplicate all objects and values that are +referred by the extension, too. Whether this is required, depends on the +application; clone is invoked by the node object when one of +its cloning methods is called. + + A good starting point for an extension class: + + +} + + method node = + match node with + None -> + assert false + | Some n -> n + + method set_node n = + node <- Some n + + end +]]> + + +This class is compatible with extension. The purpose of +defining such a class is, of course, adding further methods; and you can do it +without restriction. + + + Often, you want not only one extension class. In this case, +it is the simplest way that all your classes (for one kind of document) have +the same type (with respect to the interface; i.e. it does not matter if your +classes differ in the defined private methods and instance variables, but +public methods count). This approach avoids lots of coercions and problems with +type incompatibilities. It is simple to implement: + + + + + +If a class does not need a method (e.g. because it does not make sense, or it +would violate some important condition), it is possible to define the method +and to always raise an exception when the method is invoked +(e.g. assert false). + + + The latter is a strong recommendation: do not try to further +specialize the types of extension objects. It is difficult, sometimes even +impossible, and almost never worth-while. + + + + How to bind extension classes to element types + + Once you have defined your extension classes, you can bind them +to element types. The simplest case is that you have only one class and that +this class is to be always used. The parsing functions in the module +Pxp_yacc take a spec argument which +can be customized. If your single class has the name c, +this argument should be + + +let spec = + make_spec_from_alist + ~data_exemplar: (new data_impl c) + ~default_element_exemplar: (new element_impl c) + ~element_alist: [] + () + + +This means that data nodes will be created from the exemplar passed by +~data_exemplar and that all element nodes will be made from the exemplar +specified by ~default_element_exemplar. In ~element_alist, you can +pass that different exemplars are to be used for different element types; but +this is an optional feature. If you do not need it, pass the empty list. + + + +Remember that an exemplar is a (node, extension) pair that serves as pattern +when new nodes (and the corresponding extension objects) are added to the +document tree. In this case, the exemplar contains c as +extension, and when nodes are created, the exemplar is cloned, and cloning +makes also a copy of c such that all nodes of the document +tree will have a copy of c as extension. + + + The ~element_alist argument can bind +specific element types to specific exemplars; as exemplars may be instances of +different classes it is effectively possible to bind element types to +classes. For example, if the element type "p" is implemented by class "c_p", +and "q" is realized by "c_q", you can pass the following value: + + +let spec = + make_spec_from_alist + ~data_exemplar: (new data_impl c) + ~default_element_exemplar: (new element_impl c) + ~element_alist: + [ "p", new element_impl c_p; + "q", new element_impl c_q; + ] + () + + +The extension object c is still used for all data nodes and +for all other element types. + + + + +
+ + + + + Details of the mapping from XML text to the tree representation + + + + The representation of character-free elements + + If an element declaration does not allow the element to +contain character data, the following rules apply. + + If the element must be empty, i.e. it is declared with the +keyword EMPTY, the element instance must be effectively +empty (it must not even contain whitespace characters). The parser guarantees +that a declared EMPTY element does never contain a data +node, even if the data node represents the empty string. + + If the element declaration only permits other elements to occur +within that element but not character data, it is still possible to insert +whitespace characters between the subelements. The parser ignores these +characters, too, and does not create data nodes for them. + + + Example. + + Consider the following element types: + + + + +]]> + +Only x may contain character data, the keyword +#PCDATA indicates this. The other types are character-free. + + + + The XML term + + +]]> + +will be internally represented by an element node for x +with three subnodes: the first z element, a data node +containing the space character, and the second z element. +In contrast to this, the term + + +]]> + +is represented by an element node for y with only +two subnodes, the two z elements. There +is no data node for the space character because spaces are ignored in the +character-free element y. + + + + + + The representation of character data + + The XML specification allows all Unicode characters in XML +texts. This parser can be configured such that UTF-8 is used to represent the +characters internally; however, the default character encoding is +ISO-8859-1. (Currently, no other encodings are possible for the internal string +representation; the type Pxp_types.rep_encoding enumerates +the possible encodings. Principially, the parser could use any encoding that is +ASCII-compatible, but there are currently only lexical analyzers for UTF-8 and +ISO-8859-1. It is currently impossible to use UTF-16 or UCS-4 as internal +encodings (or other multibyte encodings which are not ASCII-compatible) unless +major parts of the parser are rewritten - unlikely...) + + + +The internal encoding may be different from the external encoding (specified +in the XML declaration <?xml ... encoding="..."?>); in +this case the strings are automatically converted to the internal encoding. + + + +If the internal encoding is ISO-8859-1, it is possible that there are +characters that cannot be represented. In this case, the parser ignores such +characters and prints a warning (to the collect_warning +object that must be passed when the parser is called). + + + The XML specification allows lines to be separated by single LF +characters, by CR LF character sequences, or by single CR +characters. Internally, these separators are always converted to single LF +characters. + + The parser guarantees that there are never two adjacent data +nodes; if necessary, data material that would otherwise be represented by +several nodes is collapsed into one node. Note that you can still create node +trees with adjacent data nodes; however, the parser does not return such trees. + + + Note that CDATA sections are not represented specially; such +sections are added to the current data material that being collected for the +next data node. + + + + + The representation of entities within documents + + Entities are not represented within +documents! If the parser finds an entity reference in the document +content, the reference is immediately expanded, and the parser reads the +expansion text instead of the reference. + + + + + The representation of attributes As attribute +values are composed of Unicode characters, too, the same problems with the +character encoding arise as for character material. Attribute values are +converted to the internal encoding, too; and if there are characters that +cannot be represented, these are dropped, and a warning is printed. + + Attribute values are normalized before they are returned by +methods like attribute. First, any remaining entity +references are expanded; if necessary, expansion is performed recursively. +Second, newline characters (any of LF, CR LF, or CR characters) are converted +to single space characters. Note that especially the latter action is +prescribed by the XML standard (but is not converted +such that it is still possible to include line feeds into attributes). + + + + + The representation of processing instructions +Processing instructions are parsed to some extent: The first word of the +PI is called the target, and it is stored separated from the rest of the PI: + + +]]> + +The exact location where a PI occurs is not represented (by default). The +parser puts the PI into the object that represents the embracing construct (an +element, a DTD, or the whole document); that means you can find out which PIs +occur in a certain element, in the DTD, or in the whole document, but you +cannot lookup the exact position within the construct. + + + If you require the exact location of PIs, it is possible to +create extra nodes for them. This mode is controled by the option +enable_pinstr_nodes. The additional nodes have the node type +T_pinstr target, and are created +from special exemplars contained in the spec (see +pxp_document.mli). + + + + The representation of comments + +Normally, comments are not represented; they are dropped by +default. However, if you require them, it is possible to create +T_comment nodes for them. This mode can be specified by the +option enable_comment_nodes. Comment nodes are created from +special exemplars contained in the spec (see +pxp_document.mli). You can access the contents of comments through the +method comment. + + + + The attributes <literal>xml:lang</literal> and +<literal>xml:space</literal> + + These attributes are not supported specially; they are handled +like any other attribute. + + + + + And what about namespaces? + Currently, there is no special support for namespaces. +However, the parser allows it that the colon occurs in names such that it is +possible to implement namespaces on top of the current API. + + Some future release of PXP will support namespaces as built-in +feature... + + + + +
+ + + + + Configuring and calling the parser + + + + + + + Overview + +There are the following main functions invoking the parser (in Pxp_yacc): + + + + parse_document_entity: You want to +parse a complete and closed document consisting of a DTD and the document body; +the body is validated against the DTD. This mode is interesting if you have a +file + + ... +]]> + +and you can accept any DTD that is included in the file (e.g. because the file +is under your control). + + + + parse_wfdocument_entity: You want to +parse a complete and closed document consisting of a DTD and the document body; +but the body is not validated, only checked for well-formedness. This mode is +preferred if validation costs too much time or if the DTD is missing. + + + + parse_dtd_entity: You want only to +parse an entity (file) containing the external subset of a DTD. Sometimes it is +interesting to read such a DTD, for example to compare it with the DTD included +in a document, or to apply the next mode: + + + + parse_content_entity: You want only to +parse an entity (file) containing a fragment of a document body; this fragment +is validated against the DTD you pass to the function. Especially, the fragment +must not have a <!DOCTYPE> clause, and must directly +begin with an element. The element is validated against the DTD. This mode is +interesting if you want to check documents against a fixed, immutable DTD. + + + + parse_wfcontent_entity: This function +also parses a single element without DTD, but does not validate it. + + + extract_dtd_from_document_entity: This +function extracts the DTD from a closed document consisting of a DTD and a +document body. Both the internal and the external subsets are extracted. + + + + + +In many cases, parse_document_entity is the preferred mode +to parse a document in a validating way, and +parse_wfdocument_entity is the mode of choice to parse a +file while only checking for well-formedness. + + + +There are a number of variations of these modes. One important application of a +parser is to check documents of an untrusted source against a fixed DTD. One +solution is to not allow the <!DOCTYPE> clause in +these documents, and treat the document like a fragment (using mode +parse_content_entity). This is very simple, but +inflexible; users of such a system cannot even define additional entities to +abbreviate frequent phrases of their text. + + + +It may be necessary to have a more intelligent checker. For example, it is also +possible to parse the document to check fully, i.e. with DTD, and to compare +this DTD with the prescribed one. In order to fully parse the document, mode +parse_document_entity is applied, and to get the DTD to +compare with mode parse_dtd_entity can be used. + + + +There is another very important configurable aspect of the parser: the +so-called resolver. The task of the resolver is to locate the contents of an +(external) entity for a given entity name, and to make the contents accessible +as a character stream. (Furthermore, it also normalizes the character set; +but this is a detail we can ignore here.) Consider you have a file called +"main.xml" containing + + +%sub; +]]> + +and a file stored in the subdirectory "sub" with name +"sub.xml" containing + + +%subsub; +]]> + +and a file stored in the subdirectory "subsub" of +"sub" with name "subsub.xml" (the +contents of this file do not matter). Here, the resolver must track that +the second entity subsub is located in the directory +"sub/subsub", i.e. the difficulty is to interpret the +system (file) names of entities relative to the entities containing them, +even if the entities are deeply nested. + + + +There is not a fixed resolver already doing everything right - resolving entity +names is a task that highly depends on the environment. The XML specification +only demands that SYSTEM entities are interpreted like URLs +(which is not very precise, as there are lots of URL schemes in use), hoping +that this helps overcoming the local peculiarities of the environment; the idea +is that if you do not know your environment you can refer to other entities by +denoting URLs for them. I think that this interpretation of +SYSTEM names may have some applications in the internet, but +it is not the first choice in general. Because of this, the resolver is a +separate module of the parser that can be exchanged by another one if +necessary; more precisely, the parser already defines several resolvers. + + + +The following resolvers do already exist: + + + + Resolvers reading from arbitrary input channels. These +can be configured such that a certain ID is associated with the channel; in +this case inner references to external entities can be resolved. There is also +a special resolver that interprets SYSTEM IDs as URLs; this resolver can +process relative SYSTEM names and determine the corresponding absolute URL. + + + + A resolver that reads always from a given O'Caml +string. This resolver is not able to resolve further names unless the string is +not associated with any name, i.e. if the document contained in the string +refers to an external entity, this reference cannot be followed in this +case. + + + A resolver for file names. The SYSTEM +name is interpreted as file URL with the slash "/" as separator for +directories. - This resolver is derived from the generic URL resolver. + + + +The interface a resolver must have is documented, so it is possible to write +your own resolver. For example, you could connect the parser with an HTTP +client, and resolve URLs of the HTTP namespace. The resolver classes support +that several independent resolvers are combined to one more powerful resolver; +thus it is possible to combine a self-written resolver with the already +existing resolvers. + + + +Note that the existing resolvers only interpret SYSTEM +names, not PUBLIC names. If it helps you, it is possible to +define resolvers for PUBLIC names, too; for example, such a +resolver could look up the public name in a hash table, and map it to a system +name which is passed over to the existing resolver for system names. It is +relatively simple to provide such a resolver. + + + + + + + Resolvers and sources + + + Using the built-in resolvers (called sources) + + The type source enumerates the two +possibilities where the document to parse comes from. + + +type source = + Entity of ((dtd -> Pxp_entity.entity) * Pxp_reader.resolver) + | ExtID of (ext_id * Pxp_reader.resolver) + + +You normally need not to worry about this type as there are convenience +functions that create source values: + + + + + from_file s: The document is read from +file s; you may specify absolute or relative path names. +The file name must be encoded as UTF-8 string. + + +There is an optional argument ~system_encoding +specifying the character encoding which is used for the names of the file +system. For example, if this encoding is ISO-8859-1 and s is +also a ISO-8859-1 string, you can form the source: + + + + + +This source has the advantage that +it is able to resolve inner external entities; i.e. if your document includes +data from another file (using the SYSTEM attribute), this +mode will find that file. However, this mode cannot resolve +PUBLIC identifiers nor SYSTEM identifiers +other than "file:". + + + + from_channel ch: The document is read +from the channel ch. In general, this source also supports +file URLs found in the document; however, by default only absolute URLs are +understood. It is possible to associate an ID with the channel such that the +resolver knows how to interpret relative URLs: + + +from_channel ~id:(System "file:///dir/dir1/") ch + + +There is also the ~system_encoding argument specifying how file names are +encoded. - The example from above can also be written (but it is no +longer possible to interpret relative URLs because there is no ~id argument, +and computing this argument is relatively complicated because it must +be a valid URL): + + +let ch = open_in s in +let src = from_channel ~system_encoding:`Enc_iso88591 ch in +...; +close_in ch + + + + + from_string s: The string +s is the document to parse. This mode is not able to +interpret file names of SYSTEM clauses, nor it can look up +PUBLIC identifiers. + + Normally, the encoding of the string is detected as usual +by analyzing the XML declaration, if any. However, it is also possible to +specify the encoding directly: + + +let src = from_string ~fixenc:`ISO-8859-2 s + + + + + ExtID (id, r): The document to parse +is denoted by the identifier id (either a +SYSTEM or PUBLIC clause), and this +identifier is interpreted by the resolver r. Use this mode +if you have written your own resolver. + Which character sets are possible depends on the passed +resolver r. + + + Entity (get_entity, r): The document +to parse is returned by the function invocation get_entity +dtd, where dtd is the DTD object to use (it may be +empty). Inner external references occuring in this entity are resolved using +the resolver r. + Which character sets are possible depends on the passed +resolver r. + + + + + + + The resolver API + + A resolver is an object that can be opened like a file, but you +do not pass the file name to the resolver, but the XML identifier of the entity +to read from (either a SYSTEM or PUBLIC +clause). When opened, the resolver must return the +Lexing.lexbuf that reads the characters. The resolver can +be closed, and it can be cloned. Furthermore, it is possible to tell the +resolver which character set it should assume. - The following from Pxp_reader: + + unit + method init_warner : collect_warnings -> unit + method rep_encoding : rep_encoding + method open_in : ext_id -> Lexing.lexbuf + method close_in : unit + method change_encoding : string -> unit + method clone : resolver + method close_all : unit + end +]]> + +The resolver object must work as follows: + + + + + When the parser is called, it tells the resolver the +warner object and the internal encoding by invoking +init_warner and init_rep_encoding. The +resolver should store these values. The method rep_encoding +should return the internal encoding. + + + + If the parser wants to read from the resolver, it invokes +the method open_in. Either the resolver succeeds, in which +case the Lexing.lexbuf reading from the file or stream must +be returned, or opening fails. In the latter case the method implementation +should raise an exception (see below). + + + If the parser finishes reading, it calls the +close_in method. + + + If the parser finds a reference to another external +entity in the input stream, it calls clone to get a second +resolver which must be initially closed (not yet connected with an input +stream). The parser then invokes open_in and the other +methods as described. + + + If you already know the character set of the input +stream, you should recode it to the internal encoding, and define the method +change_encoding as an empty method. + + + If you want to support multiple external character sets, +the object must follow a much more complicated protocol. Directly after +open_in has been called, the resolver must return a lexical +buffer that only reads one byte at a time. This is only possible if you create +the lexical buffer with Lexing.from_function; the function +must then always return 1 if the EOF is not yet reached, and 0 if EOF is +reached. If the parser has read the first line of the document, it will invoke +change_encoding to tell the resolver which character set to +assume. From this moment, the object can return more than one byte at once. The +argument of change_encoding is either the parameter of the +"encoding" attribute of the XML declaration, or the empty string if there is +not any XML declaration or if the declaration does not contain an encoding +attribute. + + At the beginning the resolver must only return one +character every time something is read from the lexical buffer. The reason for +this is that you otherwise would not exactly know at which position in the +input stream the character set changes. + + If you want automatic recognition of the character set, +it is up to the resolver object to implement this. + + + If an error occurs, the parser calls the method +close_all for the top-level resolver; this method should +close itself (if not already done) and all clones. + + + + Exceptions + +It is possible to chain resolvers such that when the first resolver is not able +to open the entity, the other resolvers of the chain are tried in turn. The +method open_in should raise the exception +Not_competent to indicate that the next resolver should try +to open the entity. If the resolver is able to handle the ID, but some other +error occurs, the exception Not_resolvable should be raised +to force that the chain breaks. + + + + Example: How to define a resolver that is equivalent to +from_string: ... + + + + + Predefined resolver components + +There are some classes in Pxp_reader that define common resolver behaviour. + + + ?fixenc:encoding -> + ?auto_close:bool -> + in_channel -> + resolver +]]> + +Reads from the passed channel (it may be even a pipe). If the +~id argument is passed to the object, the created resolver +accepts only this ID. Otherwise all IDs are accepted. - Once the resolver has +been cloned, it does not accept any ID. This means that this resolver cannot +handle inner references to external entities. Note that you can combine this +resolver with another resolver that can handle inner references (such as +resolve_as_file); see class 'combine' below. - If you pass the +~fixenc argument, the encoding of the channel is set to the +passed value, regardless of any auto-recognition or any XML declaration. - If +~auto_close = true (which is the default), the channel is +closed after use. If ~auto_close = false, the channel is +left open. + + + + + channel_of_id:(ext_id -> (in_channel * encoding option)) -> + resolver +]]> + +This resolver calls the function ~channel_of_id to open a +new channel for the passed ext_id. This function must either +return the channel and the encoding, or it must fail with Not_competent. The +function must return None as encoding if the default +mechanism to recognize the encoding should be used. It must return +Some e if it is already known that the encoding of the +channel is e. If ~auto_close = true +(which is the default), the channel is closed after use. If +~auto_close = false, the channel is left open. + + + + + ?auto_close:bool -> + url_of_id:(ext_id -> Neturl.url) -> + channel_of_url:(Neturl.url -> (in_channel * encoding option)) -> + resolver +]]> + +When this resolver gets an ID to read from, it calls the function +~url_of_id to get the corresponding URL. This URL may be a +relative URL; however, a URL scheme must be used which contains a path. The +resolver converts the URL to an absolute URL if necessary. The second +function, ~channel_of_url, is fed with the absolute URL as +input. This function opens the resource to read from, and returns the channel +and the encoding of the resource. + + +Both functions, ~url_of_id and +~channel_of_url, can raise Not_competent to indicate that +the object is not able to read from the specified resource. However, there is a +difference: A Not_competent from ~url_of_id is left as it +is, but a Not_competent from ~channel_of_url is converted to +Not_resolvable. So only ~url_of_id decides which URLs are +accepted by the resolver and which not. + + +The function ~channel_of_url must return +None as encoding if the default mechanism to recognize the +encoding should be used. It must return Some e if it is +already known that the encoding of the channel is e. + + +If ~auto_close = true (which is the default), the channel is +closed after use. If ~auto_close = false, the channel is +left open. + + +Objects of this class contain a base URL relative to which relative URLs are +interpreted. When creating a new object, you can specify the base URL by +passing it as ~base_url argument. When an existing object is +cloned, the base URL of the clone is the URL of the original object. - Note +that the term "base URL" has a strict definition in RFC 1808. + + + + + ?fixenc:encoding -> + string -> + resolver +]]> + +Reads from the passed string. If the ~id argument is passed +to the object, the created resolver accepts only this ID. Otherwise all IDs are +accepted. - Once the resolver has been cloned, it does not accept any ID. This +means that this resolver cannot handle inner references to external +entities. Note that you can combine this resolver with another resolver that +can handle inner references (such as resolve_as_file); see class 'combine' +below. - If you pass the ~fixenc argument, the encoding of +the string is set to the passed value, regardless of any auto-recognition or +any XML declaration. + + + + (string * encoding option)) -> + resolver +]]> + +This resolver calls the function ~string_of_id to get the +string for the passed ext_id. This function must either +return the string and the encoding, or it must fail with Not_competent. The +function must return None as encoding if the default +mechanism to recognize the encoding should be used. It must return +Some e if it is already known that the encoding of the +string is e. + + + + + ?host_prefix:[ `Not_recognized | `Allowed | `Required ] -> + ?system_encoding:encoding -> + ?url_of_id:(ext_id -> Neturl.url) -> + ?channel_of_url: (Neturl.url -> (in_channel * encoding option)) -> + unit -> + resolver +]]> +Reads from the local file system. Every file name is interpreted as +file name of the local file system, and the referred file is read. + + +The full form of a file URL is: file://host/path, where +'host' specifies the host system where the file identified 'path' +resides. host = "" or host = "localhost" are accepted; other values +will raise Not_competent. The standard for file URLs is +defined in RFC 1738. + + +Option ~file_prefix: Specifies how the "file:" prefix of +file names is handled: + + + `Not_recognized:The prefix is not +recognized. + + + `Allowed: The prefix is allowed but +not required (the default). + + + `Required: The prefix is +required. + + + + +Option ~host_prefix: Specifies how the "//host" phrase of +file names is handled: + + + `Not_recognized:The prefix is not +recognized. + + + `Allowed: The prefix is allowed but +not required (the default). + + + `Required: The prefix is +required. + + + + +Option ~system_encoding: Specifies the encoding of file +names of the local file system. Default: UTF-8. + + +Options ~url_of_id, ~channel_of_url: Not +for the casual user! + + + + + resolver list -> + resolver +]]> + +Combines several resolver objects. If a concrete entity with an +ext_id is to be opened, the combined resolver tries the +contained resolvers in turn until a resolver accepts opening the entity +(i.e. it does not raise Not_competent on open_in). + + +Clones: If the 'clone' method is invoked before 'open_in', all contained +resolvers are cloned separately and again combined. If the 'clone' method is +invoked after 'open_in' (i.e. while the resolver is open), additionally the +clone of the active resolver is flagged as being preferred, i.e. it is tried +first. + + + + + + + The DTD classes Sorry, not yet +written. Perhaps the interface definition of Pxp_dtd expresses the same: + + +&markup-dtd1.mli;&markup-dtd2.mli; + + + + + Invoking the parser + + Here a description of Pxp_yacc. + + + Defaults + The following defaults are available: + + +val default_config : config +val default_extension : ('a node extension) as 'a +val default_spec : ('a node extension as 'a) spec + + + + + + Parsing functions + In the following, the term "closed document" refers to +an XML structure like + + +<!DOCTYPE ... [ declarations ] > +<root> +... +</root> + + +The term "fragment" refers to an XML structure like + + +<root> +... +</root> + + +i.e. only to one isolated element instance. + + + + source -> dtd +]]> + +Parses the declarations which are contained in the entity, and returns them as +dtd object. + + + + source -> dtd +]]> + +Extracts the DTD from a closed document. Both the internal and the external +subsets are extracted and combined to one dtd object. This +function does not parse the whole document, but only the parts that are +necessary to extract the DTD. + + + + dtd) -> + ?id_index:('ext index) -> + config -> + source -> + 'ext spec -> + 'ext document +]]> + +Parses a closed document and validates it against the DTD that is contained in +the document (internal and external subsets). The option +~transform_dtd can be used to transform the DTD in the +document, and to use the transformed DTD for validation. If +~id_index is specified, an index of all ID attributes is +created. + + + + + source -> + 'ext spec -> + 'ext document +]]> + +Parses a closed document, but checks it only on well-formedness. + + + + + config -> + source -> + dtd -> + 'ext spec -> + 'ext node +]]> + +Parses a fragment, and validates the element. + + + + + source -> + 'ext spec -> + 'ext node +]]> + +Parses a fragment, but checks it only on well-formedness. + + + + + Configuration options + + + + + + warner:The parser prints +warnings by invoking the method warn for this warner +object. (Default: all warnings are dropped) + + errors_with_line_numbers:If +true, errors contain line numbers; if false, errors contain only byte +positions. The latter mode is faster. (Default: true) + + enable_pinstr_nodes:If true, +the parser creates extra nodes for processing instructions. If false, +processing instructions are simply added to the element or document surrounding +the instructions. (Default: false) + + enable_super_root_node:If +true, the parser creates an extra node which is the parent of the root of the +document tree. This node is called super root; it is an element with type +T_super_root. - If there are processing instructions outside +the root element and outside the DTD, they are added to the super root instead +of the document. - If false, the super root node is not created. (Default: +false) + + enable_comment_nodes:If true, +the parser creates nodes for comments with type T_comment; +if false, such nodes are not created. (Default: false) + + encoding:Specifies the +internal encoding of the parser. Most strings are then represented according to +this encoding; however there are some exceptions (especially +ext_id values which are always UTF-8 encoded). +(Default: `Enc_iso88591) + + +recognize_standalone_declaration: If true and if the parser is +validating, the standalone="yes" declaration forces that it +is checked whether the document is a standalone document. - If false, or if the +parser is in well-formedness mode, such declarations are ignored. +(Default: true) + + + store_element_positions: If +true, for every non-data node the source position is stored. If false, the +position information is lost. If available, you can get the positions of nodes +by invoking the position method. +(Default: true) + + idref_pass:If true and if +there is an ID index, the parser checks whether every IDREF or IDREFS attribute +refer to an existing node; this requires that the parser traverses the whole +doument tree. If false, this check is left out. (Default: false) + + validate_by_dfa:If true and if +the content model for an element type is deterministic, a deterministic finite +automaton is used to validate whether the element contents match the content +model of the type. If false, or if a DFA is not available, a backtracking +algorithm is used for validation. (Default: true) + + + +accept_only_deterministic_models: If true, only deterministic content +models are accepted; if false, any syntactically correct content models can be +processed. (Default: true) + + + + + + Which configuration should I use? + First, I recommend to vary the default configuration instead of +creating a new configuration record. For instance, to set +idref_pass to true, change the default +as in: + +let config = { default_config with idref_pass = true } + +The background is that I can add more options to the record in future versions +of the parser without breaking your programs. + + + Do I need extra nodes for processing instructions? +By default, such nodes are not created. This does not mean that the +processing instructions are lost; however, you cannot find out the exact +location where they occur. For example, the following XML text + + +]]> + +will normally create one element node for x containing +one subnode for y. The processing +instructions are attached to x in a separate hash table; you +can access them using x # pinstr "pi1" and x # +pinstr "pi2", respectively. The information is lost where the +instructions occur within x. + + + + If the option enable_pinstr_nodes is +turned on, the parser creates extra nodes pi1 and +pi2 such that the subnodes of x are now: + + + +The extra nodes contain the processing instructions in the usual way, i.e. you +can access them using pi1 # pinstr "pi1" and pi2 # +pinstr "pi2", respectively. + + + Note that you will need an exemplar for the PI nodes (see +make_spec_from_alist). + + + Do I need a super root node? + By default, there is no super root node. The +document object refers directly to the node representing the +root element of the document, i.e. + + + +if r is the root node. This is sometimes inconvenient: (1) +Some algorithms become simpler if every node has a parent, even the root +node. (2) Some standards such as XPath call the "root node" the node whose +child represents the root of the document. (3) The super root node can serve +as a container for processing instructions outside the root element. Because of +these reasons, it is possible to create an extra super root node, whose child +is the root node: + + + +When extra nodes are also created for processing instructions, these nodes can +be added to the super root node if they occur outside the root element (reason +(3)), and the order reflects the order in the source text. + + + Note that you will need an exemplar for the super root node +(see make_spec_from_alist). + + + What is the effect of the UTF-8 encoding? + By default, the parser represents strings (with few +exceptions) as ISO-8859-1 strings. These are well-known, and there are tools +and fonts for this encoding. + + However, internationalization may require that you switch over +to UTF-8 encoding. In most environments, the immediate effect will be that you +cannot read strings with character codes >= 160 any longer; your terminal will +only show funny glyph combinations. It is strongly recommended to install +Unicode fonts (GNU Unifont, + +Markus Kuhn's fonts) and terminal emulators +that can handle UTF-8 byte sequences. Furthermore, a Unicode editor may +be helpful (such as Yudit). There are +also FAQ by +Markus Kuhn. + + By setting encoding to +`Enc_utf8 all strings originating from the parsed XML +document are represented as UTF-8 strings. This includes not only character +data and attribute values but also element names, attribute names and so on, as +it is possible to use any Unicode letter to form such names. Strictly +speaking, PXP is only XML-compliant if the UTF-8 mode is used; otherwise it +will have difficulties when validating documents containing +non-ISO-8859-1-names. + + + This mode does not have any impact on the external +representation of documents. The character set assumed when reading a document +is set in the XML declaration, and character set when writing a document must +be passed to the write method. + + + + How do I check that nodes exist which are referred by IDREF attributes? + First, you must create an index of all occurring ID +attributes: + + + +This index must be passed to the parsing function: + + index) + config source spec +]]> + +Next, you must turn on the idref_pass mode: + + + +Note that now the whole document tree will be traversed, and every node will be +checked for IDREF and IDREFS attributes. If the tree is big, this may take some +time. + + + + + What are deterministic content models? + These type of models can speed up the validation checks; +furthermore they ensure SGML-compatibility. In particular, a content model is +deterministic if the parser can determine the actually used alternative by +inspecting only the current token. For example, this element has +non-deterministic contents: + + +]]> + +If the first element in x is u, the +parser does not know which of the alternatives (u,v) or +(u,y+) will work; the parser must also inspect the second +element to be able to distinguish between the alternatives. Because such +look-ahead (or "guessing") is required, this example is +non-deterministic. + + + The XML standard demands that content models must be +deterministic. So it is recommended to turn the option +accept_only_deterministic_models on; however, PXP can also +process non-deterministic models using a backtracking algorithm. + + Deterministic models ensure that validation can be performed in +linear time. In order to get the maximum benefits, PXP also implements a +special validator that profits from deterministic models; this is the +deterministic finite automaton (DFA). This validator is enabled per element +type if the element type has a deterministic model and if the option +validate_by_dfa is turned on. + + In general, I expect that the DFA method is faster than the +backtracking method; especially in the worst case the DFA takes only linear +time. However, if the content model has only few alternatives and the +alternatives do not nest, the backtracking algorithm may be better. + + + + + + + + + Updates + + Some (often later added) features that are otherwise +not explained in the manual but worth to be mentioned. + + + Methods node_position, node_path, nth_node, +previous_node, next_node for nodes: See pxp_document.mli + + Functions to determine the document order of nodes: +compare, create_ord_index, ord_number, ord_compare: See pxp_document.mli + + + + + + + + + diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig new file mode 100644 index 000000000..445095f07 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/extension_general.fig @@ -0,0 +1,47 @@ +#FIG 3.2 +Portrait +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2250 229 229 1575 2250 1800 2295 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 3375 225 225 1575 3375 1800 3375 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 675 3375 229 229 675 3375 900 3420 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2475 3375 229 229 2475 3375 2700 3420 +1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 2475 180 180 3600 2475 3780 2475 +1 3 0 1 0 7 100 0 10 0.000 1 0.0000 2880 2475 180 180 2880 2475 3060 2475 +1 3 0 1 0 7 100 0 10 0.000 1 0.0000 4320 2475 186 186 4320 2475 4500 2520 +1 3 0 1 0 7 100 0 10 0.000 1 0.0000 3600 1485 186 186 3600 1485 3780 1530 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 675 3150 1395 2385 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 1575 2475 1575 3150 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 1755 2385 2475 3150 +2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 1537 2010 3412 1462 +2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 + 0 0 1.00 60.00 120.00 + 3412 1537 1672 2047 +2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 810 3195 2707 2512 +2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 1740 3217 3442 2580 +2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 1 1 2 + 0 0 1.00 60.00 120.00 + 0 0 1.00 60.00 120.00 + 2640 3210 4177 2610 +4 0 0 80 0 14 12 0.0000 4 75 105 3555 1530 x\001 +4 0 0 80 0 14 12 0.0000 4 75 105 1530 2295 n\001 +4 0 0 80 0 12 12 0.2967 4 135 1365 1658 1950 n # extension\001 +4 0 0 80 0 12 12 0.2967 4 135 840 2475 1950 x # node\001 +4 0 0 80 0 16 12 0.0000 4 135 1140 1020 4050 The node tree\001 +4 0 0 80 0 16 12 0.0000 4 135 1245 3225 3285 The extensions\001 diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig new file mode 100644 index 000000000..071683488 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_add.fig @@ -0,0 +1,107 @@ +#FIG 3.2 +Portrait +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 1350 242 229 6141 1350 6379 1395 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6141 2250 242 229 6141 2250 6379 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 5426 2250 242 229 5426 2250 5665 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 6856 2250 242 229 6856 2250 7094 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 7571 2925 242 229 7571 2925 7809 2970 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8524 2925 242 229 8524 2925 8762 2970 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8047 2250 242 229 8047 2250 8285 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 1350 242 229 1866 1350 2104 1395 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1866 2250 242 229 1866 2250 2104 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 1151 2250 242 229 1151 2250 1390 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 2581 2250 242 229 2581 2250 2819 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3296 2925 242 229 3296 2925 3534 2970 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 4249 2925 242 229 4249 2925 4487 2970 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 3772 2250 242 229 3772 2250 4010 2295 +1 1 0 1 0 7 100 0 15 0.000 1 0.0000 8325 1350 242 229 8325 1350 8563 1395 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.76 123.53 + 5910 1440 5402 2017 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.76 123.53 + 6109 1590 6101 2025 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.76 123.53 + 6307 1537 6697 2070 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.76 123.53 + 7832 2347 7602 2692 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.76 123.53 + 8150 2452 8349 2752 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.76 123.53 + 5490 2017 5958 1492 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.76 123.53 + 6164 2010 6173 1575 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.76 123.53 + 6768 2025 6355 1470 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.76 123.53 + 7673 2715 7880 2415 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.76 123.53 + 8412 2707 8222 2415 +2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2 + 6387 1372 8023 2017 +2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5 + 4950 900 9000 900 9000 3375 4950 3375 4950 900 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.75 123.51 + 1635 1440 1127 2017 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.75 123.51 + 1834 1590 1826 2025 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.75 123.51 + 2032 1537 2422 2070 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.75 123.51 + 3557 2347 3327 2692 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 61.75 123.51 + 3875 2452 4074 2752 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.75 123.51 + 1215 2017 1683 1492 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.75 123.51 + 1889 2010 1898 1575 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.75 123.51 + 2493 2025 2080 1470 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.75 123.51 + 3398 2715 3605 2415 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 61.75 123.51 + 4137 2707 3947 2415 +2 1 1 1 0 7 95 0 15 4.000 0 0 -1 0 0 2 + 2112 1372 3748 2017 +2 2 0 1 0 7 95 0 -1 0.000 0 0 -1 0 0 5 + 675 900 4725 900 4725 3375 675 3375 675 900 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 8197 1545 8055 2010 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 8137 2025 8280 1590 +2 1 0 3 0 7 95 0 -1 0.000 0 0 -1 1 0 4 + 2 1 2.00 120.00 180.00 + 7875 1500 7620 1965 7845 1920 7485 2355 +4 0 0 95 0 14 13 0.0000 4 79 111 6094 1379 x\001 +4 0 0 95 0 14 13 0.0000 4 111 111 7991 2265 y\001 +4 0 0 95 0 14 13 0.0000 4 79 111 1819 1379 x\001 +4 0 0 95 0 14 13 0.0000 4 111 111 3716 2265 y\001 +4 0 0 95 0 12 12 0.0000 4 150 1470 6459 1335 x # add_node y\001 +4 0 0 95 0 12 12 0.0000 4 150 1470 2214 1365 x # add_node y\001 diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig new file mode 100644 index 000000000..ed1865f87 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_clone.fig @@ -0,0 +1,111 @@ +#FIG 3.2 +Portrait +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6345 2700 229 229 6345 2700 6570 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 5895 3600 229 229 5895 3600 6120 3645 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 6795 3600 229 229 6795 3600 7020 3645 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645 +2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2 + 4050 2610 4725 2610 +2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2 + 4050 2745 4725 2745 +2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3 + 4500 2385 4950 2655 4500 2970 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2490 1905 2025 2467 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2827 2002 3202 2542 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2115 2475 2535 1965 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 3255 2505 2872 1957 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6135 1905 5670 2467 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6472 2002 6847 2542 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 5760 2475 6180 1965 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 6900 2505 6517 1957 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 8160 1957 7860 2460 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 8407 2032 8625 2520 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 7942 2467 8212 2010 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 8685 2475 8467 1987 +2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5 + 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 6382 2460 6382 2032 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6307 2032 6307 2467 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6180 2857 5880 3360 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6427 2932 6645 3420 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 5962 3367 6232 2910 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 6705 3375 6487 2887 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2737 2460 2737 2032 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2662 2032 2662 2467 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2535 2857 2235 3360 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2782 2932 3000 3420 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2317 3367 2587 2910 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 3060 3375 2842 2887 +4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001 +4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001 +4 0 0 80 0 14 12 0.0000 4 75 105 6285 2752 x\001 +4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001 +4 0 0 80 0 12 12 0.0000 4 105 840 3690 2025 let x' =\001 +4 0 0 80 0 12 12 0.0000 4 150 1890 3690 2205 x # orphaned_clone\001 +4 0 0 80 0 14 12 0.0000 4 105 210 8235 1845 x'\001 diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig new file mode 100644 index 000000000..a9fc87eef --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_delete.fig @@ -0,0 +1,96 @@ +#FIG 3.2 +Portrait +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 2550 2092 2865 2407 +2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2 + 2595 2362 2820 2137 +2 1 0 4 0 7 80 0 -1 0.000 1 1 -1 0 0 2 + 2595 2137 2820 2362 +-6 +6 1980 2430 3420 3870 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2700 2700 229 229 2700 2700 2925 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 2250 3600 229 229 2250 3600 2475 3645 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 3150 3600 229 229 3150 3600 3375 3645 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2535 2857 2235 3360 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2782 2932 3000 3420 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2317 3367 2587 2910 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 3060 3375 2842 2887 +-6 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2700 1800 229 229 2700 1800 2925 1845 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 2025 2700 229 229 2025 2700 2250 2745 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 3375 2700 229 229 3375 2700 3600 2745 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 6345 1800 229 229 6345 1800 6570 1845 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 5670 2700 229 229 5670 2700 5895 2745 +1 3 0 1 0 7 95 0 15 4.000 1 0.0000 7020 2700 229 229 7020 2700 7245 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8325 1800 229 229 8325 1800 8550 1845 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 7875 2700 229 229 7875 2700 8100 2745 +1 3 0 1 0 7 95 0 10 4.000 1 0.0000 8775 2700 229 229 8775 2700 9000 2745 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2737 2460 2737 2032 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2662 2032 2662 2467 +2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2 + 4050 2610 4725 2610 +2 1 0 5 0 7 95 0 -1 12.000 1 0 -1 0 0 2 + 4050 2745 4725 2745 +2 1 0 5 0 7 95 0 -1 12.000 1 1 -1 0 0 3 + 4500 2385 4950 2655 4500 2970 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2490 1905 2025 2467 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2827 2002 3202 2542 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2115 2475 2535 1965 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 3255 2505 2872 1957 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6135 1905 5670 2467 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 6472 2002 6847 2542 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 5760 2475 6180 1965 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 6900 2505 6517 1957 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 8160 1957 7860 2460 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 8407 2032 8625 2520 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 7942 2467 8212 2010 +2 1 0 1 0 7 95 0 -1 4.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 8685 2475 8467 1987 +2 2 0 1 0 7 80 0 -1 4.000 0 0 -1 0 0 5 + 1575 1350 9225 1350 9225 4050 1575 4050 1575 1350 +4 0 0 80 0 14 12 0.0000 4 75 105 2640 2752 x\001 +4 0 0 95 0 12 12 0.0000 4 135 1050 3960 2250 x # delete\001 +4 0 0 80 0 14 12 0.0000 4 75 105 8280 1845 x\001 +4 0 0 80 0 14 12 0.0000 4 105 105 2655 1845 y\001 +4 0 0 80 0 14 12 0.0000 4 105 105 6300 1845 y\001 diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig new file mode 100644 index 000000000..231e76da9 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_general.fig @@ -0,0 +1,35 @@ +#FIG 3.2 +Portrait +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 2025 229 229 2025 2025 2250 2070 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1350 2025 225 225 1350 2025 1575 2025 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2700 2025 225 225 2700 2025 2925 2025 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2025 1125 225 225 2025 1125 2250 1125 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 1380 1800 1845 1275 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 1815 1207 1282 1815 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2055 1792 2055 1350 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 1980 1350 1980 1807 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 1 1.00 60.00 120.00 + 2190 1297 2550 1867 +2 1 0 1 0 7 100 0 15 0.000 0 0 -1 1 0 2 + 1 0 1.00 60.00 120.00 + 2602 1807 2220 1237 +2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 + 450 675 3150 675 3150 2475 450 2475 450 675 +4 0 0 100 0 12 10 0.0000 4 120 540 2377 1342 parent\001 +4 0 0 100 0 12 10 0.0000 4 105 810 645 1628 sub_nodes\001 diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig new file mode 100644 index 000000000..54965fe63 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/pic/node_term.fig @@ -0,0 +1,63 @@ +#FIG 3.2 +Portrait +Center +Metric +A4 +100.00 +Single +-2 +1200 2 +6 1665 2700 2835 3150 +2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5 + 2835 3150 2835 2700 1665 2700 1665 3150 2835 3150 +4 0 0 80 0 18 12 0.0000 4 135 930 1815 3015 "Cherries"\001 +-6 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2250 1125 225 225 2250 1125 2475 1125 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 1575 2025 225 225 1575 2025 1800 2025 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 2925 2025 225 225 2925 2025 3150 2025 +1 3 0 1 0 7 100 0 15 0.000 1 0.0000 900 2925 242 242 900 2925 1125 3015 +2 4 0 1 0 7 100 0 15 0.000 0 0 7 0 0 5 + 1485 4275 1485 3825 315 3825 315 4275 1485 4275 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 2085 1275 1582 1807 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 2407 1297 2940 1800 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 1417 2190 900 2692 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 1740 2190 2257 2700 +2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 + 892 3180 892 3825 +2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 + 45 675 6525 675 6525 4950 45 4950 45 675 +3 3 0 1 0 7 100 0 -1 0.000 0 0 0 22 + 2115 3645 2250 3600 2520 3555 2745 3510 2925 3555 3150 3690 + 3375 3735 3600 3735 3825 3735 4140 3825 4140 4005 4005 4185 + 3735 4230 3420 4185 3150 4230 2835 4275 2520 4230 2340 4140 + 2115 4095 1980 4005 1980 3825 2025 3735 + -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 + -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 + -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 +3 3 0 1 0 7 100 0 -1 0.000 0 0 0 17 + 3465 1170 3645 1080 4050 1035 4320 1035 4545 1080 4770 1170 + 5130 1215 5355 1350 5400 1530 5265 1665 4860 1710 4455 1710 + 4095 1665 3780 1620 3555 1575 3420 1485 3420 1305 + -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 + -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 + -1.000 +3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5 + 2475 1215 2655 1350 2970 1440 3240 1395 3420 1260 + 0.000 -1.000 -1.000 -1.000 0.000 +3 2 0 1 0 7 100 0 -1 0.000 0 0 0 5 + 1125 3060 1215 3397 1410 3607 1687 3727 2025 3720 + 0.000 -1.000 -1.000 -1.000 0.000 +4 0 0 80 0 18 12 0.0000 4 180 1065 375 4125 "An orange"\001 +4 0 0 80 0 18 12 0.0000 4 90 315 750 2985 \001 +4 0 0 80 0 18 12 0.0000 4 135 315 1410 2085 \001 +4 0 0 80 0 18 12 0.0000 4 90 315 2790 2070 \001 +4 0 0 80 0 18 12 0.0000 4 90 315 2100 1200 \001 +4 0 0 100 0 16 12 0.0000 4 135 795 3600 1260 attributes:\001 +4 0 0 100 0 16 12 0.0000 4 180 1680 3600 1485 "att" -> Value "apple"\001 +4 0 0 100 0 16 12 0.0000 4 135 795 2250 3780 attributes:\001 +4 0 0 100 0 17 12 0.0000 4 180 5910 390 4725 An orangeCherries\001 +4 0 0 100 0 16 12 0.0000 4 180 1800 2250 4005 "att" -> Value "orange"\001 diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent new file mode 100644 index 000000000..e9fdfc35a --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/readme.ent @@ -0,0 +1,364 @@ + + + + + + + + + + + + + + + diff --git a/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent b/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent new file mode 100644 index 000000000..604918bd8 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/doc/manual/src/yacc.mli.ent @@ -0,0 +1,376 @@ + diff --git a/helm/DEVEL/pxp/pxp/examples/Makefile b/helm/DEVEL/pxp/pxp/examples/Makefile new file mode 100644 index 000000000..934385757 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/Makefile @@ -0,0 +1,22 @@ +.PHONY: all +all: + +.PHONY: clean +clean: + +.PHONY: CLEAN +CLEAN: clean + $(MAKE) -C xmlforms CLEAN + $(MAKE) -C validate CLEAN + $(MAKE) -C readme CLEAN + $(MAKE) -C simple_transformation CLEAN + +.PHONY: distclean +distclean: clean + rm -f *~ + $(MAKE) -C xmlforms distclean + $(MAKE) -C validate distclean + $(MAKE) -C readme distclean + $(MAKE) -C simple_transformation distclean + + diff --git a/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore b/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore new file mode 100644 index 000000000..2395c1946 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/readme/.cvsignore @@ -0,0 +1,10 @@ +*.cmi +*.cmo +*.cma +*.cmx +*.o +*.a +*.cmxa +depend +depend.pkg + diff --git a/helm/DEVEL/pxp/pxp/examples/readme/Makefile b/helm/DEVEL/pxp/pxp/examples/readme/Makefile new file mode 100644 index 000000000..df5f6ed0d --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/readme/Makefile @@ -0,0 +1,34 @@ +# make readme: make bytecode executable +# make readme.opt: make native executable +# make clean: remove intermediate files +# make CLEAN: remove intermediate files (recursively) +# make distclean: remove any superflous files +# make install +#---------------------------------------------------------------------- + +BIN = /usr/local/bin + +.PHONY: readme +readme: + $(MAKE) -f Makefile.code readme + +.PHONY: readme.opt +readme.opt: + $(MAKE) -f Makefile.code readme.opt + + +.PHONY: clean +clean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa + +.PHONY: CLEAN +CLEAN: clean + +.PHONY: distclean +distclean: clean + rm -f *~ depend depend.pkg + rm -f readme readme.opt + +.PHONY: install +install: + cp readme $(BIN) diff --git a/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code b/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code new file mode 100644 index 000000000..0514ddf33 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/readme/Makefile.code @@ -0,0 +1,57 @@ +#---------------------------------------------------------------------- +# specific rules for this package: + +OBJECTS = to_html.cmo to_text.cmo +XOBJECTS = $(OBJECTS:.cmo=.cmx) +ARCHIVE = readme.cma +XARCHIVE = readme.cmxa +NAME = readme +REQUIRES = str pxp + +readme: $(ARCHIVE) main.cmo + ocamlfind ocamlc -o readme -custom -package "$(REQUIRES)" \ + -linkpkg $(ARCHIVE) main.cmo + +readme.opt: $(XARCHIVE) main.cmx + ocamlfind ocamlopt -o readme.opt -custom -package "$(REQUIRES)" \ + -linkpkg $(XARCHIVE) main.cmx + +$(ARCHIVE): $(OBJECTS) + $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS) + +$(XARCHIVE): $(XOBJECTS) + $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS) + +#---------------------------------------------------------------------- +# general rules: + +OPTIONS = +OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS) +OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS) +OCAMLDEP = ocamldep $(OPTIONS) +OCAMLFIND = ocamlfind + +depend: *.ml *.mli + $(OCAMLDEP) *.ml *.mli >depend + +depend.pkg: Makefile + $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg + +.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly + +.ml.cmx: + $(OCAMLOPT) -c $< + +.ml.cmo: + $(OCAMLC) -c $< + +.mli.cmi: + $(OCAMLC) -c $< + +.mll.ml: + ocamllex $< + +*.mli: + +include depend +include depend.pkg diff --git a/helm/DEVEL/pxp/pxp/examples/readme/main.ml b/helm/DEVEL/pxp/pxp/examples/readme/main.ml new file mode 100644 index 000000000..4e3837aa9 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/readme/main.ml @@ -0,0 +1,108 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +open Pxp_types +open Pxp_document +open Pxp_yacc + + +let rec print_error e = + prerr_endline(string_of_exn e) +;; + + +let run f a = + try f a with + e -> print_error e +;; + + +let convert_to_html filename = + (* read in style definition *) + let document = + parse_document_entity + { default_config with encoding = `Enc_iso88591 } + (from_file filename) + To_html.tag_map + in + let root = document # root in + let store = new To_html.store in + root # extension # to_html store stdout +;; + + +let convert_to_text filename = + (* read in style definition *) + let document = + parse_document_entity + default_config + (from_file filename) + To_text.tag_map + in + let root = document # root in + let store = new To_text.store in + let box = new To_text.box 79 79 in + root # extension # to_box store box; + box # output 0 0 stdout +;; + + +let main() = + let want_html = ref false in + let want_text = ref false in + let filename = ref None in + Arg.parse + [ "-html", Arg.Set want_html, + " convert file to html"; + "-text", Arg.Set want_text, + " convert file to text"; + ] + (fun s -> + match !filename with + None -> filename := Some s + | Some _ -> + raise (Arg.Bad "Multiple arguments not allowed.")) + "usage: readme [ -text | -html ] input.xml >output"; + let fn = + match !filename with + None -> + prerr_endline "readme: no input"; + exit 1 + | Some s -> s + in + match !want_html, !want_text with + true, false -> + run convert_to_html fn + | false, true -> + run convert_to_text fn + | _ -> + prerr_endline ("readme: Please select exactly one output format") +;; + +main();; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:31 lpadovan + * Initial revision + * + * Revision 1.5 2000/07/08 17:58:17 gerd + * Updated because of PXP API changes. + * + * Revision 1.4 2000/06/04 20:25:38 gerd + * Updates because of renamed PXP modules. + * + * Revision 1.3 2000/05/01 16:46:40 gerd + * Using the new error formatter. + * + * Revision 1.2 1999/08/23 16:54:19 gerd + * Minor changes. + * + * Revision 1.1 1999/08/22 22:29:32 gerd + * Initial revision. + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd b/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd new file mode 100644 index 000000000..8ff6a9f75 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/readme/readme.dtd @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml b/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml new file mode 100644 index 000000000..f717b2259 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/readme/to_html.ml @@ -0,0 +1,432 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + + +(*$ readme.code.header *) +open Pxp_types +open Pxp_document +(*$-*) + + +(*$ readme.code.footnote-printer *) +class type footnote_printer = + object + method footnote_to_html : store_type -> out_channel -> unit + end + +and store_type = + object + method alloc_footnote : footnote_printer -> int + method print_footnotes : out_channel -> unit + end +;; +(*$-*) + + +(*$ readme.code.store *) +class store = + object (self) + + val mutable footnotes = ( [] : (int * footnote_printer) list ) + val mutable next_footnote_number = 1 + + method alloc_footnote n = + let number = next_footnote_number in + next_footnote_number <- number+1; + footnotes <- footnotes @ [ number, n ]; + number + + method print_footnotes ch = + if footnotes <> [] then begin + output_string ch "
\n"; + output_string ch "
\n"; + List.iter + (fun (_,n) -> + n # footnote_to_html (self : #store_type :> store_type) ch) + footnotes; + output_string ch "
\n"; + end + + end +;; +(*$-*) + + + +(*$ readme.code.escape-html *) +let escape_html s = + Str.global_substitute + (Str.regexp "<\\|>\\|&\\|\"") + (fun s -> + match Str.matched_string s with + "<" -> "<" + | ">" -> ">" + | "&" -> "&" + | "\"" -> """ + | _ -> assert false) + s +;; +(*$-*) + + +(*$ readme.code.shared *) +class virtual shared = + object (self) + + (* --- default_ext --- *) + + val mutable node = (None : shared node option) + + method clone = {< >} + method node = + match node with + None -> + assert false + | Some n -> n + method set_node n = + node <- Some n + + (* --- virtual --- *) + + method virtual to_html : store -> out_channel -> unit + + end +;; +(*$-*) + + +(*$ readme.code.only-data *) +class only_data = + object (self) + inherit shared + + method to_html store ch = + output_string ch (escape_html (self # node # data)) + end +;; +(*$-*) + + +(*$ readme.code.no-markup *) +class no_markup = + object (self) + inherit shared + + method to_html store ch = + List.iter + (fun n -> n # extension # to_html store ch) + (self # node # sub_nodes) + end +;; +(*$-*) + + +(*$ readme.code.readme *) +class readme = + object (self) + inherit shared + + method to_html store ch = + (* output header *) + output_string + ch ""; + output_string + ch "\n"; + let title = + match self # node # attribute "title" with + Value s -> s + | _ -> assert false + in + let html_header, _ = + try (self # node # dtd # par_entity "readme:html:header") + # replacement_text + with WF_error _ -> "", false in + let html_trailer, _ = + try (self # node # dtd # par_entity "readme:html:trailer") + # replacement_text + with WF_error _ -> "", false in + let html_bgcolor, _ = + try (self # node # dtd # par_entity "readme:html:bgcolor") + # replacement_text + with WF_error _ -> "white", false in + let html_textcolor, _ = + try (self # node # dtd # par_entity "readme:html:textcolor") + # replacement_text + with WF_error _ -> "", false in + let html_alinkcolor, _ = + try (self # node # dtd # par_entity "readme:html:alinkcolor") + # replacement_text + with WF_error _ -> "", false in + let html_vlinkcolor, _ = + try (self # node # dtd # par_entity "readme:html:vlinkcolor") + # replacement_text + with WF_error _ -> "", false in + let html_linkcolor, _ = + try (self # node # dtd # par_entity "readme:html:linkcolor") + # replacement_text + with WF_error _ -> "", false in + let html_background, _ = + try (self # node # dtd # par_entity "readme:html:background") + # replacement_text + with WF_error _ -> "", false in + + output_string ch "
\n"; + output_string ch (escape_html title); + output_string ch "
\n"; + output_string ch " + if value <> "" then + output_string ch (name ^ "=\"" ^ escape_html value ^ "\" ")) + [ "bgcolor", html_bgcolor; + "text", html_textcolor; + "link", html_linkcolor; + "alink", html_alinkcolor; + "vlink", html_vlinkcolor; + ]; + output_string ch ">\n"; + output_string ch html_header; + output_string ch "

"; + output_string ch (escape_html title); + output_string ch "

\n"; + (* process main content: *) + List.iter + (fun n -> n # extension # to_html store ch) + (self # node # sub_nodes); + (* now process footnotes *) + store # print_footnotes ch; + (* trailer *) + output_string ch html_trailer; + output_string ch "\n"; + + end +;; +(*$-*) + + +(*$ readme.code.section *) +class section the_tag = + object (self) + inherit shared + + val tag = the_tag + + method to_html store ch = + let sub_nodes = self # node # sub_nodes in + match sub_nodes with + title_node :: rest -> + output_string ch ("<" ^ tag ^ ">\n"); + title_node # extension # to_html store ch; + output_string ch ("\n"); + List.iter + (fun n -> n # extension # to_html store ch) + rest + | _ -> + assert false + end +;; + +class sect1 = section "h1";; +class sect2 = section "h3";; +class sect3 = section "h4";; +(*$-*) + + +(*$ readme.code.map-tag *) +class map_tag the_target_tag = + object (self) + inherit shared + + val target_tag = the_target_tag + + method to_html store ch = + output_string ch ("<" ^ target_tag ^ ">\n"); + List.iter + (fun n -> n # extension # to_html store ch) + (self # node # sub_nodes); + output_string ch ("\n"); + end +;; + +class p = map_tag "p";; +class em = map_tag "b";; +class ul = map_tag "ul";; +class li = map_tag "li";; +(*$-*) + + +(*$ readme.code.br *) +class br = + object (self) + inherit shared + + method to_html store ch = + output_string ch "
\n"; + List.iter + (fun n -> n # extension # to_html store ch) + (self # node # sub_nodes); + end +;; +(*$-*) + + +(*$ readme.code.code *) +class code = + object (self) + inherit shared + + method to_html store ch = + let data = self # node # data in + (* convert tabs *) + let l = String.length data in + let rec preprocess i column = + (* this is very ineffective but comprehensive: *) + if i < l then + match data.[i] with + '\t' -> + let n = 8 - (column mod 8) in + String.make n ' ' ^ preprocess (i+1) (column + n) + | '\n' -> + "\n" ^ preprocess (i+1) 0 + | c -> + String.make 1 c ^ preprocess (i+1) (column + 1) + else + "" + in + output_string ch "

";
+      output_string ch (escape_html (preprocess 0 0));
+      output_string ch "

"; + + end +;; +(*$-*) + + +(*$ readme.code.a *) +class a = + object (self) + inherit shared + + method to_html store ch = + output_string ch " escape_html v + | Valuelist _ -> assert false + | Implied_value -> + begin match self # node # attribute "readmeref" with + Value v -> escape_html v ^ ".html" + | Valuelist _ -> assert false + | Implied_value -> + "" + end + in + if href <> "" then + output_string ch ("href=\"" ^ href ^ "\""); + output_string ch ">"; + output_string ch (escape_html (self # node # data)); + output_string ch ""; + + end +;; +(*$-*) + + +(*$ readme.code.footnote *) +class footnote = + object (self) + inherit shared + + val mutable footnote_number = 0 + + method to_html store ch = + let number = + store # alloc_footnote (self : #shared :> footnote_printer) in + let foot_anchor = + "footnote" ^ string_of_int number in + let text_anchor = + "textnote" ^ string_of_int number in + footnote_number <- number; + output_string ch ( "[" ^ string_of_int number ^ + "]" ) + + method footnote_to_html store ch = + (* prerequisite: we are in a definition list
...
*) + let foot_anchor = + "footnote" ^ string_of_int footnote_number in + let text_anchor = + "textnote" ^ string_of_int footnote_number in + output_string ch ("
[" ^ string_of_int footnote_number ^ + "]
\n
"); + List.iter + (fun n -> n # extension # to_html store ch) + (self # node # sub_nodes); + output_string ch ("\n
") + + end +;; +(*$-*) + + +(**********************************************************************) + +(*$ readme.code.tag-map *) +open Pxp_yacc + +let tag_map = + make_spec_from_alist + ~data_exemplar:(new data_impl (new only_data)) + ~default_element_exemplar:(new element_impl (new no_markup)) + ~element_alist: + [ "readme", (new element_impl (new readme)); + "sect1", (new element_impl (new sect1)); + "sect2", (new element_impl (new sect2)); + "sect3", (new element_impl (new sect3)); + "title", (new element_impl (new no_markup)); + "p", (new element_impl (new p)); + "br", (new element_impl (new br)); + "code", (new element_impl (new code)); + "em", (new element_impl (new em)); + "ul", (new element_impl (new ul)); + "li", (new element_impl (new li)); + "footnote", (new element_impl (new footnote : #shared :> shared)); + "a", (new element_impl (new a)); + ] + () +;; +(*$-*) + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:31 lpadovan + * Initial revision + * + * Revision 1.6 2000/08/22 14:34:25 gerd + * Using make_spec_from_alist instead of make_spec_from_mapping. + * + * Revision 1.5 2000/08/18 21:15:14 gerd + * Update because of PXP API change: par_entity raises WF_error + * instead of Validation error if the entity is not defined. + * Further minor updates. + * + * Revision 1.4 2000/07/08 17:58:17 gerd + * Updated because of PXP API changes. + * + * Revision 1.3 2000/06/04 20:25:38 gerd + * Updates because of renamed PXP modules. + * + * Revision 1.2 1999/09/12 20:09:32 gerd + * Added section marks. + * + * Revision 1.1 1999/08/22 22:29:32 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml b/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml new file mode 100644 index 000000000..fc45f45cd --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/readme/to_text.ml @@ -0,0 +1,599 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +open Pxp_types +open Pxp_document + + +(**********************************************************************) +(* The box class represents formatted text *) +(**********************************************************************) + +class type formatted_text = + object + method output : int -> int -> out_channel -> unit + (* output initial_indent indent ch: + * 'initial_indent' is how far the first line should be indented; + * 'indent' how far the rest. 'ch' is the channel on which the lines + * are to be printed. + *) + + method multiline : bool + (* whether the box occupies multiple lines *) + + method width_of_last_line : int + (* returns the width of the last line *) + end +;; + + +type text = + Text of string + | Box of formatted_text +;; + + +let textwidth tl = + let rec compute tl r = + match tl with + [] -> r + | t :: tl' -> + begin match t with + Text s -> + compute tl' (r + String.length s) + | Box b -> + if b # multiline then + compute tl' (b # width_of_last_line) + else + compute tl' (r + b # width_of_last_line) + end + in + compute (List.rev tl) 0 +;; + + +class box the_initial_width the_width = + object (self) + + (* The 'initial_width' is the width that is available on the first + * line of output; the 'width' is the width that is available in the + * rest. + *) + + val initial_width = the_initial_width + val width = the_width + + (* state: *) + + val mutable space_added = false + val mutable linefeed_added = false + val mutable is_first_line = true + val mutable lines = [] + (* lines in reverse order (first line = last element) *) + val mutable current_line = [] + (* not member of 'lines'; again reverse order *) + val mutable current_indent = 0 + + method add_space = + if not space_added then begin + space_added <- true; + linefeed_added <- true; + current_line <- Text " " :: current_line + end + + method ignore_space = + space_added <- true; + linefeed_added <- true + + method add_linefeed = + if not linefeed_added then begin + linefeed_added <- true; + if not space_added then + current_line <- Text " " :: current_line + end + + method ignore_linefeed = + linefeed_added <- true + + method add_newline = + lines <- current_line :: lines; + current_line <- []; + space_added <- true; + linefeed_added <- true; + is_first_line <- false; + current_indent <- 0; + + method add_word s = + (* first try to add 's' to 'current_line' *) + let current_line' = Text s :: current_line in + let current_width = + if is_first_line then initial_width else width in + if textwidth current_line' + current_indent <= current_width then begin + (* ok, the line does not become too long *) + current_line <- current_line'; + space_added <- false; + linefeed_added <- false + end + else begin + (* The line would be too long. *) + lines <- current_line :: lines; + current_line <- [Text s]; + space_added <- false; + linefeed_added <- false; + is_first_line <- false; + current_indent <- 0; + end + + method add_box b = + current_line <- Box b :: current_line; + space_added <- false; + linefeed_added <- false; + + + method width_of_last_line = + textwidth current_line + current_indent + + + method available_width = + let current_width = + if is_first_line then initial_width else width in + current_width - textwidth current_line - current_indent + + + method multiline = + lines <> [] or + (List.exists + (function + Text _ -> false + | Box b -> b # multiline) + current_line) + + method output initial_indent indent ch = + let eff_lines = + List.rev + (current_line :: lines) in + let rec out_lines cur_indent ll = + match ll with + [] -> () + | l :: ll' -> + output_string ch (String.make cur_indent ' '); + List.iter + (function + Text s -> + output_string ch s + | Box b -> + b # output 0 indent ch + ) + (List.rev l); + if ll' <> [] then + output_string ch "\n"; + out_lines indent ll' + in + out_lines initial_indent eff_lines + end +;; + + +class listitem_box listmark indent totalwidth = + let initial_newline = String.length listmark >= indent in + object (self) + inherit box totalwidth (totalwidth - indent) as super + + val extra_indent = indent + + initializer + self # add_word listmark; + if initial_newline then + self # add_newline + else begin + current_line <- Text (String.make (indent - String.length listmark) ' ') + :: current_line; + space_added <- true; + linefeed_added <- true; + end + + + method output initial_indent indent ch = + super # output initial_indent (indent + extra_indent) ch + end +;; + + +(**********************************************************************) +(* Footnotes etc. *) +(**********************************************************************) + + +class type footnote_printer = + object + method footnote_to_box : store_type -> box -> unit + end + +and store_type = + object + method alloc_footnote : footnote_printer -> int + method print_footnotes : box -> unit + end +;; + + +class store = + object (self) + + val mutable footnotes = ( [] : (int * footnote_printer) list ) + val mutable next_footnote_number = 1 + + method alloc_footnote n = + let number = next_footnote_number in + next_footnote_number <- number+1; + footnotes <- footnotes @ [ number, n ]; + number + + method print_footnotes (b : box) = + if footnotes <> [] then begin + b # add_newline; + b # add_newline; + let w = b # available_width in + b # add_word (String.make (w/3) '-'); + b # add_newline; + b # add_newline; + List.iter + (fun (_,n) -> + n # footnote_to_box (self : #store_type :> store_type) b) + footnotes; + b # add_newline; + end + end +;; + + + +(**********************************************************************) +(* The extension objects *) +(**********************************************************************) + + +class virtual shared = + object (self) + + (* --- default_ext --- *) + + val mutable node = (None : shared node option) + + method clone = {< >} + method node = + match node with + None -> + assert false + | Some n -> n + method set_node n = + node <- Some n + + (* --- virtual --- *) + + method virtual to_box : store -> box -> unit + (* to_box store b: + * formats the element using box 'b' + *) + end +;; + + +class only_data = + object (self) + inherit shared + + val white_space_re = Str.regexp "[ \t]+\\|\n" + + method to_box store b = + let s = self # node # data in + let splitted = Str.full_split white_space_re s in + List.iter + (function + Str.Delim "\n" -> + b # add_linefeed + | Str.Delim _ -> + b # add_space + | Str.Text s -> + b # add_word s) + splitted + end +;; + + +class no_markup = + object (self) + inherit shared + + method to_box store b = + List.iter + (fun n -> n # extension # to_box store b) + (self # node # sub_nodes) + end +;; + + +class readme = + object (self) + inherit shared + + method to_box store b = + let title = + match self # node # attribute "title" with + Value s -> s + | _ -> assert false + in + let w = b # available_width in + let line = String.make (w-1) '*' in + b # add_word line; + b # add_newline; + b # add_word title; + b # add_newline; + b # add_word line; + b # add_newline; + b # add_newline; + (* process main content: *) + List.iter + (fun n -> n # extension # to_box store b) + (self # node # sub_nodes); + (* now process footnotes *) + store # print_footnotes b; + (* trailer *) + b # add_newline; + end +;; + + +class section the_tag = + object (self) + inherit shared + + val tag = the_tag + + method to_box store b = + let sub_nodes = self # node # sub_nodes in + match sub_nodes with + title_node :: rest -> + b # add_newline; + let w = b # available_width in + let line = String.make (w-1) tag in + b # add_word line; + b # add_newline; + b # add_word (title_node # data); + b # add_newline; + b # add_word line; + b # add_newline; + List.iter + (fun n -> + n # extension # to_box store b) + rest; + | _ -> + assert false + end +;; + +class sect1 = section '=';; +class sect2 = section '-';; +class sect3 = section ':';; + + +class p = + object (self) + inherit shared + + method to_box store b = + let within_list = + match self # node # parent # node_type with + T_element "li" -> true + | T_element _ -> false + | _ -> assert false + in + if not within_list then + b # add_newline; + let w = b # available_width in + let b' = new box w w in + b' # ignore_space; + List.iter + (fun n -> n # extension # to_box store b') + (self # node # sub_nodes); + b # add_box (b' :> formatted_text); + b # add_newline; + end +;; + + +class li = + object (self) + inherit shared + + method to_box store b = + b # add_newline; + let w = b # available_width in + let b' = new listitem_box "-" 3 w in + b' # ignore_space; + List.iter + (fun n -> n # extension # to_box store b') + (self # node # sub_nodes); + b # add_box (b' :> formatted_text); + end +;; + + +class code = + object (self) + inherit shared + + method to_box store b = + b # add_newline; + let w = b # available_width in + let b' = new box w w in + b' # ignore_space; + let data = self # node # data in + (* convert tabs *) + let l = String.length data in + let rec add s i column = + (* this is very ineffective but comprehensive: *) + if i < l then + match data.[i] with + '\t' -> + let n = 8 - (column mod 8) in + add (s ^ String.make n ' ') (i+1) (column + n) + | '\n' -> + b' # add_word s; + b' # add_newline; + add "" (i+1) 0 + | c -> + add (s ^ String.make 1 c) (i+1) (column + 1) + else + if s <> "" then begin + b' # add_word s; + b' # add_newline; + end + in + add "" 0 0; + b # add_box (b' :> formatted_text); + b # add_newline; + end +;; + + +class br = + object (self) + inherit shared + + method to_box store b = + b # add_newline; + end +;; + + +class footnote = + object (self) + inherit shared + + val mutable footnote_number = 0 + + method to_box store b = + let number = + store # alloc_footnote (self : #shared :> footnote_printer) in + footnote_number <- number; + b # add_space; + b # add_word ("[" ^ string_of_int number ^ "]"); + + method footnote_to_box store b = + let w = b # available_width in + let n = "[" ^ string_of_int footnote_number ^ "]" in + let b' = new listitem_box n 6 w in + b' # ignore_space; + List.iter + (fun n -> n # extension # to_box store b') + (self # node # sub_nodes); + b # add_box (b' :> formatted_text); + b # add_newline; + b # add_newline; + + end +;; + + +class a = + object (self) + inherit shared + + val mutable footnote_number = 0 + val mutable a_href = "" + + method to_box store b = + let href = + match self # node # attribute "href" with + Value v -> "see " ^ v + | Valuelist _ -> assert false + | Implied_value -> + begin match self # node # attribute "readmeref" with + Value v -> "see file " ^ v + | Valuelist _ -> assert false + | Implied_value -> + "" + end + in + a_href <- href; + List.iter + (fun n -> n # extension # to_box store b) + (self # node # sub_nodes); + if href <> "" then begin + let number = + store # alloc_footnote (self : #shared :> footnote_printer) in + footnote_number <- number; + b # add_space; + b # add_word ("[" ^ string_of_int number ^ "]"); + end + + method footnote_to_box store b = + if a_href <> "" then begin + let w = b # available_width in + let n = "[" ^ string_of_int footnote_number ^ "]" in + let b' = new listitem_box n 6 w in + b' # ignore_space; + b' # add_word a_href; + b # add_box (b' :> formatted_text); + b # add_newline; + b # add_newline; + end + end +;; + +(**********************************************************************) + +open Pxp_yacc + +let tag_map = + make_spec_from_alist + ~data_exemplar:(new data_impl (new only_data)) + ~default_element_exemplar:(new element_impl (new no_markup)) + ~element_alist: + [ "readme", (new element_impl (new readme)); + "sect1", (new element_impl (new sect1)); + "sect2", (new element_impl (new sect2)); + "sect3", (new element_impl (new sect3)); + "title", (new element_impl (new no_markup)); + "p", (new element_impl (new p)); + "br", (new element_impl (new br)); + "code", (new element_impl (new code)); + "em", (new element_impl (new no_markup)); + "ul", (new element_impl (new no_markup)); + "li", (new element_impl (new li)); + "footnote", (new element_impl (new footnote : #shared :> shared)); + "a", (new element_impl (new a : #shared :> shared)); + ] + () +;; + + + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:31 lpadovan + * Initial revision + * + * Revision 1.5 2000/08/22 14:34:25 gerd + * Using make_spec_from_alist instead of make_spec_from_mapping. + * + * Revision 1.4 2000/08/18 21:15:25 gerd + * Minor updates because of PXP API changes. + * + * Revision 1.3 2000/07/08 17:58:17 gerd + * Updated because of PXP API changes. + * + * Revision 1.2 2000/06/04 20:25:38 gerd + * Updates because of renamed PXP modules. + * + * Revision 1.1 1999/08/22 22:29:32 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile b/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile new file mode 100644 index 000000000..27be18c30 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/Makefile @@ -0,0 +1,21 @@ +all: print sort delcol + +print: print.ml + ocamlfind ocamlc -o print -package pxp -linkpkg -custom \ + -predicates pxp_without_utf8 print.ml + +sort: sort.ml + ocamlfind ocamlc -o sort -package pxp -linkpkg -custom \ + -predicates pxp_without_utf8 sort.ml + +delcol: delcol.ml + ocamlfind ocamlc -o delcol -package pxp -linkpkg -custom \ + -predicates pxp_without_utf8 delcol.ml + +clean: + rm -f *.cmo *.cma *.cmi *.cmxa *.a *.o + +distclean: clean + rm -f *~ print sort delcol + +CLEAN: clean diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/README b/helm/DEVEL/pxp/pxp/examples/simple_transformation/README new file mode 100644 index 000000000..5b9212862 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/README @@ -0,0 +1,17 @@ +Usage: + sort -by phone + match n # node_type with + T_element name when name = col -> + raise Skip + | _ -> n # orphaned_flat_clone) + tree +;; + + +let main() = + let column = ref "" in + Arg.parse + [ "-col", Arg.String (fun s -> column := s), + " (last-name|first-name|phone)"; + ] + (fun _ -> raise (Arg.Bad "Bad usage")) + "usage: sort [ options ]"; + if !column = "" then ( + prerr_endline "Column not specified!"; + exit 1; + ); + if not(List.mem !column ["last-name"; "first-name"; "phone"]) then ( + prerr_endline ("Unknown column: " ^ !column); + exit 1 + ); + try + let dtd = parse_dtd_entity default_config (from_file "record.dtd") in + let tree = + parse_content_entity default_config (from_channel stdin) dtd default_spec + in + print_endline ""; + (delcol !column tree) # write (Out_channel stdout) `Enc_iso88591 + with + x -> + prerr_endline(string_of_exn x); + exit 1 +;; + + +main();; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:32 lpadovan + * Initial revision + * + * Revision 1.2 2000/08/24 09:42:52 gerd + * Updated a comment. + * + * Revision 1.1 2000/08/24 09:39:59 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml new file mode 100644 index 000000000..56f5fb69b --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/print.ml @@ -0,0 +1,60 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(* Read a record-list structure and print it *) +open Pxp_types;; +open Pxp_document;; +open Pxp_yacc;; + +let print tree = + iter_tree + ~pre: + (fun n -> + match n # node_type with + T_element "last-name" -> + print_endline ("Last name: " ^ n # data) + | T_element "first-name" -> + print_endline ("First name: " ^ n # data) + | T_element "phone" -> + print_endline ("Telephone number: " ^ n # data) + | _ -> + ()) + ~post: + (fun n -> + match n # node_type with + T_element "record" -> + print_newline() + | _ -> + ()) + tree +;; + +let main() = + try + let dtd = parse_dtd_entity default_config (from_file "record.dtd") in + let tree = + parse_content_entity default_config (from_channel stdin) dtd default_spec in + print tree + with + x -> + prerr_endline(string_of_exn x); + exit 1 +;; + + +main();; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:32 lpadovan + * Initial revision + * + * Revision 1.1 2000/08/22 21:57:43 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd b/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd new file mode 100644 index 000000000..b054ccd29 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/record.dtd @@ -0,0 +1,5 @@ + + + + + diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml new file mode 100644 index 000000000..00d36b09b --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sample.xml @@ -0,0 +1,18 @@ + + + + Stolpmann + Gerd + 997705 + + + Smith + Jack + 12345 + + + Ützgür + xxx + 7654 + + diff --git a/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml new file mode 100644 index 000000000..297730f66 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/simple_transformation/sort.ml @@ -0,0 +1,83 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +(* Read a record-list, sort it, and print it as XML *) +open Pxp_types;; +open Pxp_document;; +open Pxp_yacc;; + +let sort by tree = + map_tree + ~pre: + (fun n -> n # orphaned_flat_clone) + ~post: + (fun n -> + match n # node_type with + T_element "record-list" -> + let l = n # sub_nodes in + let l' = List.sort + (fun a b -> + let a_string = + try (find_element by a) # data + with Not_found -> "" in + let b_string = + try (find_element by b) # data + with Not_found -> "" in + Pervasives.compare a_string b_string) + l in + n # set_nodes l'; + n + | _ -> + n) + tree +;; + + +let main() = + let criterion = ref "last-name" in + Arg.parse + [ "-by", Arg.String (fun s -> criterion := s), + " (last-name|first-name|phone)"; + ] + (fun _ -> raise (Arg.Bad "Bad usage")) + "usage: sort [ options ]"; + if not(List.mem !criterion ["last-name"; "first-name"; "phone"]) then ( + prerr_endline ("Unknown criterion: " ^ !criterion); + exit 1 + ); + try + let dtd = parse_dtd_entity default_config (from_file "record.dtd") in + let tree = + parse_content_entity default_config (from_channel stdin) dtd default_spec + in + print_endline ""; + (sort !criterion tree) # write (Out_channel stdout) `Enc_iso88591 + with + x -> + prerr_endline(string_of_exn x); + exit 1 +;; + + +main();; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:32 lpadovan + * Initial revision + * + * Revision 1.3 2000/08/30 16:05:44 gerd + * Minor update + * + * Revision 1.2 2000/08/24 09:40:11 gerd + * Allow that columns are missing. + * + * Revision 1.1 2000/08/22 21:57:44 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore b/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore new file mode 100644 index 000000000..e125622dd --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/validate/.cvsignore @@ -0,0 +1,13 @@ +*.cmi +*.cmo +*.cma +*.cmx +*.o +*.a +*.cmxa +*.new +*.mlf +*.ml0 +depend +depend.pkg + diff --git a/helm/DEVEL/pxp/pxp/examples/validate/Makefile b/helm/DEVEL/pxp/pxp/examples/validate/Makefile new file mode 100644 index 000000000..64b691887 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/validate/Makefile @@ -0,0 +1,28 @@ +# make validate: make bytecode executable +# make validate.opt: make native executable +# make clean: remove intermediate files (in this directory) +# make CLEAN: remove intermediate files (recursively) +# make distclean: remove any superflous files (recursively) +#---------------------------------------------------------------------- + +pxpvalidate: validate.ml + ocamlfind ocamlc -o pxpvalidate -package "pxp" -linkpkg validate.ml + +pxpvalidate.opt: validate.ml + ocamlfind ocamlopt -o pxpvalidate.opt -package "pxp" -linkpkg validate.ml + +#---------------------------------------------------------------------- +.PHONY: all +all: + +.PHONY: clean +clean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa + +.PHONY: CLEAN +CLEAN: clean + +.PHONY: distclean +distclean: clean + rm -f *~ + rm -f pxpvalidate pxpvalidate.opt diff --git a/helm/DEVEL/pxp/pxp/examples/validate/validate.ml b/helm/DEVEL/pxp/pxp/examples/validate/validate.ml new file mode 100644 index 000000000..3bb83d2d1 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/validate/validate.ml @@ -0,0 +1,126 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + + +open Pxp_document;; +open Pxp_yacc;; +open Pxp_types;; + +let error_happened = ref false;; + +let print_error e = + print_endline (string_of_exn e) +;; + +class warner = + object + method warn w = + print_endline ("WARNING: " ^ w) + end +;; + +let parse debug wf iso88591 filename = + try + (* Parse the document: *) + let parse_fn = + if wf then parse_wfdocument_entity + else + let index = new hash_index in + parse_document_entity + ?transform_dtd:None + ~id_index:(index :> 'ext index) + in + let doc = + parse_fn + { default_config with + debugging_mode = debug; + encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8; + idref_pass = true; + warner = new warner + } + (from_file filename) + default_spec + in + () + with + e -> + (* Print error; remember that there was an error *) + error_happened := true; + print_error e +;; + + +let main() = + let debug = ref false in + let wf = ref false in + let iso88591 = ref false in + let files = ref [] in + Arg.parse + [ "-d", Arg.Set debug, + " turn debugging mode on"; + "-wf", Arg.Set wf, + " check only on well-formedness"; + "-iso-8859-1", Arg.Set iso88591, + " use ISO-8859-1 as internal encoding instead of UTF-8"; + ] + (fun x -> files := x :: !files) + " +usage: pxpvalidate [options] file ... + +- checks the validity of XML documents. See below for list of options. + +PXP - The XML parser for Objective Caml + +List of options:"; + files := List.rev !files; + List.iter (parse !debug !wf !iso88591) !files; +;; + + +main(); +if !error_happened then exit(1);; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:31 lpadovan + * Initial revision + * + * Revision 1.10 2000/08/30 15:58:41 gerd + * Updated. + * + * Revision 1.9 2000/07/14 14:57:30 gerd + * Updated: warner + * + * Revision 1.8 2000/07/14 14:13:15 gerd + * Cosmetic changes. + * + * Revision 1.7 2000/07/14 14:11:06 gerd + * Updated because of changes of the PXP API. + * + * Revision 1.6 2000/07/08 21:53:00 gerd + * Updated because of PXP interface changes. + * + * Revision 1.5 2000/06/04 20:21:55 gerd + * Updated to new module names. + * + * Revision 1.4 2000/05/01 16:44:57 gerd + * Added check for ID uniqueness. + * Using new error formatter. + * + * Revision 1.3 1999/11/09 22:27:30 gerd + * The programs returns now an exit code of 1 if one of the + * XML files produces an error. + * + * Revision 1.2 1999/09/01 23:09:56 gerd + * Added the option -wf that switches to well-formedness checking + * instead of validation. + * + * Revision 1.1 1999/08/14 22:20:53 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore b/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore new file mode 100644 index 000000000..e125622dd --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/.cvsignore @@ -0,0 +1,13 @@ +*.cmi +*.cmo +*.cma +*.cmx +*.o +*.a +*.cmxa +*.new +*.mlf +*.ml0 +depend +depend.pkg + diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile new file mode 100644 index 000000000..5a0ba32b3 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile @@ -0,0 +1,33 @@ +# make xmlforms: make bytecode executable +# make xmlforms.opt: make native executable +# make clean: remove intermediate files +# make CLEAN: remove intermediate files (recursively) +# make distclean: remove any superflous files +# make release: cleanup, create archive, tag CVS module +# (for developers) +#---------------------------------------------------------------------- + +.PHONY: xmlforms +xmlforms: + $(MAKE) -f Makefile.code xmlforms + +.PHONY: xmlforms.opt +xmlforms.opt: + $(MAKE) -f Makefile.code xmlforms.opt + + +.PHONY: clean +clean: + rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa + +.PHONY: CLEAN +CLEAN: clean + $(MAKE) -C styles CLEAN + +.PHONY: distclean +distclean: clean + rm -f *~ depend depend.pkg + rm -f xmlforms xmlforms.opt + $(MAKE) -C styles distclean + + diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code new file mode 100644 index 000000000..f99674042 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/Makefile.code @@ -0,0 +1,57 @@ +#---------------------------------------------------------------------- +# specific rules for this package: + +OBJECTS = ds_context.cmo ds_style.cmo +XOBJECTS = $(OBJECTS:.cmo=.cmx) +ARCHIVE = xmlforms.cma +XARCHIVE = xmlforms.cmxa +NAME = xmlforms +REQUIRES = camltk str pxp + +xmlforms: $(ARCHIVE) ds_app.cmo + ocamlfind ocamlc -g -o xmlforms -custom -package "$(REQUIRES)" \ + -linkpkg $(ARCHIVE) ds_app.cmo + +xmlform.opt: $(XARCHIVE) ds_app.cmx + ocamlfind ocamlopt -o xmlforms.opt -custom -package "$(REQUIRES)" \ + -linkpkg $(XARCHIVE) ds_app.cmx + +$(ARCHIVE): $(OBJECTS) + $(OCAMLC) -a -o $(ARCHIVE) $(OBJECTS) + +$(XARCHIVE): $(XOBJECTS) + $(OCAMLOPT) -a -o $(XARCHIVE) $(XOBJECTS) + +#---------------------------------------------------------------------- +# general rules: + +OPTIONS = +OCAMLC = ocamlc -g $(OPTIONS) $(ROPTIONS) +OCAMLOPT = ocamlopt -p $(OPTIONS) $(ROPTIONS) +OCAMLDEP = ocamldep $(OPTIONS) +OCAMLFIND = ocamlfind + +depend: *.ml *.mli + $(OCAMLDEP) *.ml *.mli >depend + +depend.pkg: Makefile + $(OCAMLFIND) use -p ROPTIONS= $(REQUIRES) >depend.pkg + +.SUFFIXES: .cmo .cmi .cmx .ml .mli .mll .mly + +.ml.cmx: + $(OCAMLOPT) -c $< + +.ml.cmo: + $(OCAMLC) -c $< + +.mli.cmi: + $(OCAMLC) -c $< + +.mll.ml: + ocamllex $< + +*.mli: + +include depend +include depend.pkg diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/README b/helm/DEVEL/pxp/pxp/examples/xmlforms/README new file mode 100644 index 000000000..806a4094a --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/README @@ -0,0 +1,61 @@ +----------------------------------------------------------------------------- +xmlforms +----------------------------------------------------------------------------- + +THE IDEA: + +This example uses XML for two purposes: + +- The "story" and layout of the application is specified in XML +- The data records are stored in XML + +An "application" is a set of "masks" or sequences of masks, and every mask +is thought as a visible page of the application, containing layout +elements and functional elements. Layout is specified in TeX-style using +hboxes, vboxes, hspaces, vspaces. Functional elements are "entries" (input +box for a string with one line), "textboxes" (input boxes with several +lines), and buttons. + +See styles/ds-style.dtd for the DTD of an application specification, and +the other xml files in this directory for examples. + +The entries and textboxes are bound to "slots", i.e. string variables. If +the application is started, the slots are read from a file, and if the +user presses a special "save" button, the slots are stored into this file. +The format of this data file is again XML; the simplistic DTD can be found +in styles/ds-object.dtd. + + +THE IMPLEMENTATION: + +There is currently a mapping of the specifications to ocamltk, done by a +program called "xmlforms". + + +HOW TO COMPILE: + +It is assumed that "findlib" is present on your system; see ABOUT-FINDLIB +in the toplevel directory. +The "markup" module must have been installed. + +- "make xmlforms" produces a bytecode executable "xmlforms" +- "make xmlforms.opt" produces a native executable "xmlforms.opt" + +Note that you cannot start the executables directly: + + +HOW TO START AN APPLICATION: + +As "xmlforms" is a generic executable, there is a simple mechanism to bind +it to a specific instance of an application. For example, in the "styles" +subdirectory there is the application specification "crazy-style.xml". To +start it, make a symlink called "crazy" referring to the "xmlforms" +binary, set the environment variable DATASHEETS to the directory where the +DTDs and XML files can be found, and start "crazy": + + ln -s ../xmlforms crazy + DATASHEETS=. crazy my-record.xml + +(If you do not set DATASHEETS, a default directory, normally +"/opt/xmlforms/lib" is used.) + diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml new file mode 100644 index 000000000..55589ea59 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_app.ml @@ -0,0 +1,107 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +open Tk +open Pxp_types +open Pxp_document +open Pxp_yacc +open Ds_context +open Ds_style + + +let installdir = + try Sys.getenv "DATASHEETS" with + Not_found -> "/opt/xmlforms/lib" +let style_sysid = ref "" +let object_dtd_sysid = Filename.concat installdir "ds-object.dtd" +let object_dtd_root = "record" + + +let rec print_error e = + print_endline (string_of_exn e) +;; + + +let run f arg1 arg2 = + try f arg1 arg2 with + e -> print_error e +;; + + +let edit filename cmd = + (* read in style definition *) + let index = new hash_index in + let style = + parse_document_entity + ~id_index:(index :> 'ext index) + default_config + (from_file !style_sysid) + tag_map + in + let root = style # root in + root # extension # prepare (index :> 'ext index); + + let obj_dtd = + parse_dtd_entity + default_config + (from_file object_dtd_sysid) + in + obj_dtd # set_root object_dtd_root; + + let topframe = openTk() in + let context = new context filename obj_dtd index root topframe in + + Toplevel.configure topframe [ Width (Centimeters 20.0); + Height (Centimeters 12.0); + ]; + Pack.propagate_set topframe false; + Wm.title_set topframe cmd; + context # goto (root # extension # start_node_name); + mainLoop() +;; + + +let main() = + let cmd = Filename.basename Sys.argv.(0) in + match Sys.argv with + [| _; filename |] -> + style_sysid := Filename.concat installdir (cmd ^ "-style.xml"); + run edit filename cmd + | _ -> + prerr_endline ("usage: " ^ cmd ^ " filename"); + exit(1) +;; + +main();; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:32 lpadovan + * Initial revision + * + * Revision 1.6 2000/07/16 19:36:03 gerd + * Updated. + * + * Revision 1.5 2000/07/08 22:03:11 gerd + * Updates because of PXP interface changes. + * + * Revision 1.4 2000/06/04 20:29:19 gerd + * Updates because of renamed PXP modules. + * + * Revision 1.3 2000/05/01 16:48:45 gerd + * Using the new error formatter. + * + * Revision 1.2 1999/12/17 21:34:29 gerd + * The name of the root element is set to "record" in the + * object_dtd; otherwise the parser would not check that the root + * element is the right element. + * + * Revision 1.1 1999/08/21 19:11:05 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml new file mode 100644 index 000000000..453ca00f0 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_context.ml @@ -0,0 +1,238 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +open Pxp_types +open Pxp_document +open Pxp_yacc + +let empty_record = new element_impl (Pxp_yacc.default_extension);; +let empty_dnode = new data_impl Pxp_yacc.default_extension;; + +class context the_filename the_obj_dtd the_index the_root the_topframe = + object (self) + val filename = the_filename + val obj_dtd = the_obj_dtd + val node_index = the_index + val mutable obj = empty_record # create_element + the_obj_dtd (T_element "record") [] + val root = the_root + val topframe = the_topframe + val mutable wdg = None + + val mutable history = ( [| |] : string array ) + val mutable index = 0 + + initializer + self # load_obj + + method obj = obj + + (* history *) + + method private leave_node = + begin match wdg with + None -> () + | Some w -> Tk.destroy w + end; + wdg <- None + + method private enter_node = + let where = history.(index) in + let n = + try node_index # find where with + Not_found -> failwith ("Mask not found: " ^ where) in + let w = n # extension # create_widget topframe self in + Tk.pack [w] (n # extension # pack_opts @ [ Tk.Expand true] ); + wdg <- Some w + + + + method previous = + if index > 0 then + index <- index - 1 + else + raise Not_found; + self # leave_node; + self # enter_node; + + + method next = + if index < Array.length history - 1 then + index <- index + 1 + else + raise Not_found; + self # leave_node; + self # enter_node; + + + method goto where = + assert (index <= Array.length history); + self # leave_node; + let persisting_history = + if index < Array.length history then + Array.sub history 0 (index+1) + else + history + in + history <- Array.concat [ persisting_history; [| where |] ]; + index <- Array.length history - 1; + self # enter_node; + + + method current = + if index < Array.length history then + history.(index) + else + raise Not_found + + + (* read, write the slots of object *) + + method search_slot name = + let rec search n = + match n # node_type with + T_element "string" -> + if n # required_string_attribute "name" = name then + n + else raise Not_found + | T_element _ -> + search_list (n # sub_nodes) + | T_data -> + raise Not_found + | _ -> + assert false + + and search_list l = + match l with + x :: l' -> + (try search x with Not_found -> search_list l') + | [] -> + raise Not_found + in + search obj + + method get_slot name = + let d = (self # search_slot name) # data in + d + + method set_slot name value = + let dtd = obj # dtd in + begin try + let n = self # search_slot name in + n # delete + with + Not_found -> () + end; + let e_string = empty_record # create_element dtd (T_element "string") + [ "name", name ] in + let dnode = empty_dnode # create_data dtd value in + e_string # add_node dnode; + e_string # local_validate(); + obj # add_node e_string; + assert(self # get_slot name = value) + + (* load, save object *) + + + method load_obj = + if Sys.file_exists filename then begin + obj <- parse_content_entity + default_config + (from_file filename) + obj_dtd + default_spec + end + else begin + print_string "New file!\n"; + flush stdout + end + + + method save_obj = + let fd = open_out filename in + try + + let re1 = Str.regexp "&" in + let re2 = Str.regexp "<" in + let re3 = Str.regexp "'" in + let re4 = Str.regexp ">" in + let protect s = + let s1 = Str.global_replace re1 "&" s in + let s2 = Str.global_replace re2 "<" s1 in + let s3 = Str.global_replace re3 "'" s2 in + let s4 = Str.global_replace re2 ">" s1 in + s3 + in + + let rec iterate (n : 'node extension node as 'node) = + match n # node_type with + T_data -> + output_string fd (protect (n # data)) + | T_element name -> + output_string fd ("<" ^ name ^ "\n"); + let anames = n # attribute_names in + List.iter + (fun aname -> + let aval = n # attribute aname in + let v = + match aval with + Value s -> + aname ^ "='" ^ protect s ^ "'\n" + | Valuelist l -> + aname ^ "='" ^ String.concat " " (List.map protect l) ^ "'\n" + | Implied_value -> + "" + in + output_string fd v) + anames; + output_string fd ">"; + List.iter iterate (n # sub_nodes); + output_string fd (""); + | _ -> + assert false + in + + output_string fd "\n"; + iterate obj; + close_out fd + with + e -> + close_out fd; + raise e + + end +;; + + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:31 lpadovan + * Initial revision + * + * Revision 1.7 2000/08/30 15:58:49 gerd + * Updated. + * + * Revision 1.6 2000/07/23 20:25:05 gerd + * Update because of API change: local_validate. + * + * Revision 1.5 2000/07/16 19:36:03 gerd + * Updated. + * + * Revision 1.4 2000/07/08 22:03:11 gerd + * Updates because of PXP interface changes. + * + * Revision 1.3 2000/06/04 20:29:19 gerd + * Updates because of renamed PXP modules. + * + * Revision 1.2 2000/05/30 00:09:08 gerd + * Minor fix. + * + * Revision 1.1 1999/08/21 19:11:05 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml new file mode 100644 index 000000000..08d0daa03 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/ds_style.ml @@ -0,0 +1,778 @@ +(* $Id$ + * ---------------------------------------------------------------------- + * + *) + +open Pxp_types +open Pxp_document +open Ds_context + + +let get_dimension s = + let re = Str.regexp "\\([0-9]*\\(.[0-9]+\\)?\\)[ \t\n]*\\(px\\|cm\\|in\\|mm\\|pt\\)" in + if Str.string_match re s 0 then begin + let number = Str.matched_group 1 s in + let dim = Str.matched_group 3 s in + match dim with + "px" -> Tk.Pixels (int_of_float (float_of_string number)) + | "cm" -> Tk.Centimeters (float_of_string number) + | "in" -> Tk.Inches (float_of_string number) + | "mm" -> Tk.Millimeters (float_of_string number) + | "pt" -> Tk.PrinterPoint (float_of_string number) + | _ -> assert false + end + else + failwith ("Bad dimension: " ^ s) +;; + + +class virtual shared = + object(self) + + (* --- default_ext --- *) + + val mutable node = (None : shared node option) + + method clone = {< >} + method node = + match node with + None -> + assert false + | Some n -> n + method set_node n = + node <- Some n + + (* --- shared attributes: color & font settings --- *) + + val mutable fgcolor = (None : string option) + val mutable bgcolor = (None : string option) + val mutable font = (None : string option) + + method fgcolor = + (* Get the foreground color: If there is a local value, return it; + * otherwise ask parent node + *) + match fgcolor with + Some c -> c + | None -> try self # node # parent # extension # fgcolor with + Not_found -> failwith "#fgcolor" + + method bgcolor = + (* Get the background color: If there is a local value, return it; + * otherwise ask parent node + *) + match bgcolor with + Some c -> c + | None -> try self # node # parent # extension # bgcolor with + Not_found -> failwith "#bgcolor" + + method font = + (* Get the current font: If there is a local value, return it; + * otherwise ask parent node + *) + match font with + Some c -> c + | None -> try self # node # parent # extension # font with + Not_found -> failwith "#font" + + method private init_color_and_font = + let get_color n = + try + match self # node # attribute n with + Value v -> Some v + | Implied_value -> None + | _ -> assert false + with Not_found -> None in + fgcolor <- get_color "fgcolor"; + bgcolor <- get_color "bgcolor"; + font <- get_color "font"; (* sic! *) + + + method private bg_color_opt = + [ Tk.Background (Tk.NamedColor (self # bgcolor)) ] + + method private fg_color_opt = + [ Tk.Foreground (Tk.NamedColor (self # fgcolor)) ] + + method private font_opt = + [ Tk.Font (self # font) ] + + (* --- virtual --- *) + + method virtual prepare : shared Pxp_yacc.index -> unit + method virtual create_widget : Widget.widget -> context -> Widget.widget + + method pack_opts = ( [] : Tk.options list ) + method xstretchable = false + method ystretchable = false + + method accept (c:context) = () + + method private get_mask = + (* find parent which is a mask *) + let rec search n = + match n # node_type with + T_element "mask" -> + n # extension + | T_element _ -> + search (n # parent) + | _ -> + assert false + in + search (self # node) + + + method private accept_mask (c:context) = + let rec iterate n = + n # extension # accept c; + List.iter iterate (n # sub_nodes) + in + iterate (self # get_mask # node) + + + method start_node_name = + (failwith "#start_node_name" : string) + + (* --- debug --- *) + + method private name = + let nt = self # node # node_type in + match nt with + T_element n -> n + | T_data -> "#PCDATA" + | _ -> assert false + + end +;; + + +class default = + object (self) + inherit shared + + method prepare idx = + self # init_color_and_font + + method create_widget w c = + failwith "default # create_widget" + end +;; + + +let dummy_node = new element_impl (new default);; + +class application = + object (self) + inherit shared + + val mutable start_node = dummy_node + + method prepare idx = + (* prepare this node *) + self # init_color_and_font; + if fgcolor = None then fgcolor <- Some "black"; + if bgcolor = None then bgcolor <- Some "white"; + if font = None then font <- Some "fixed"; + let start = + match self # node # attribute "start" with + Value v -> v + | _ -> assert false in + start_node <- (try idx # find start with + Not_found -> failwith "Start node not found"); + (* iterate over the subtree *) + let rec iterate n = + n # extension # prepare idx; + List.iter iterate (n # sub_nodes) + in + List.iter iterate (self # node # sub_nodes) + + + method start_node_name = + match self # node # attribute "start" with + Value v -> v + | _ -> assert false + + method create_widget w c = + start_node # extension # create_widget w c + + method pack_opts = + start_node # extension # pack_opts + end +;; + + +class sequence = + object (self) + inherit shared + + method prepare idx = + self # init_color_and_font; + + method create_widget w c = + let node = List.hd (self # node # sub_nodes) in + node # extension # create_widget w c + + method pack_opts = + let node = List.hd (self # node # sub_nodes) in + node # extension # pack_opts + end +;; + + +class vbox = + object (self) + inherit shared + + val mutable att_halign = "left" + + method prepare idx = + self # init_color_and_font; + match self # node # attribute "halign" with + Value v -> att_halign <- v + | _ -> assert false + + method create_widget w c = + let f = Frame.create w (self # bg_color_opt) in + let nodes = self # node # sub_nodes in + let options = + match att_halign with + "left" -> [ Tk.Anchor Tk.W ] + | "right" -> [ Tk.Anchor Tk.E ] + | "center" -> [ Tk.Anchor Tk.Center ] + | _ -> assert false + in + List.iter + (fun n -> + let opts = n # extension # pack_opts in + let wdg = n # extension # create_widget f c in + Tk.pack [wdg] (options @ opts); + ) + nodes; + f + + method pack_opts = + match self # xstretchable, self # ystretchable with + true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ] + | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ] + | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ] + | false, false -> [] + + method xstretchable = + let nodes = self # node # sub_nodes in + List.exists (fun n -> n # extension # xstretchable) nodes + + method ystretchable = + let nodes = self # node # sub_nodes in + List.exists (fun n -> n # extension # ystretchable) nodes + + end + +;; + + +class mask = + object (self) + + inherit vbox + + method prepare idx = + self # init_color_and_font; + att_halign <- "left" + end +;; + + +class hbox = + object (self) + inherit shared + + val mutable att_width = None + val mutable att_halign = "left" + val mutable att_valign = "top" + + method prepare idx = + self # init_color_and_font; + begin match self # node # attribute "halign" with + Value v -> att_halign <- v + | _ -> assert false + end; + begin match self # node # attribute "valign" with + Value v -> att_valign <- v + | _ -> assert false + end; + begin match self # node # attribute "width" with + Value v -> att_width <- Some (get_dimension v) + | Implied_value -> att_width <- None + | _ -> assert false + end + + method create_widget w c = + let f1 = Frame.create w (self # bg_color_opt) in + let f_extra = + match att_width with + None -> [] + | Some wd -> + [ Canvas.create f1 + ( [ Tk.Width wd; Tk.Height (Tk.Pixels 0); + Tk.Relief Tk.Flat; + Tk.HighlightThickness (Tk.Pixels 0); + ] @ + self # bg_color_opt ) ] + in + let f2 = Frame.create f1 (self # bg_color_opt) in + let nodes = self # node # sub_nodes in + + let outer_pack_opts = + match att_halign with + "left" -> [ Tk.Anchor Tk.W ] + | "right" -> [ Tk.Anchor Tk.E ] + | "center" -> [ Tk.Anchor Tk.Center ] + | _ -> assert false + in + let inner_pack_opts = + match att_valign with + "top" -> [ Tk.Anchor Tk.N ] + | "bottom" -> [ Tk.Anchor Tk.S ] + | "center" -> [ Tk.Anchor Tk.Center ] + | _ -> assert false + in + List.iter + (fun n -> + let opts = n # extension # pack_opts in + let wdg = n # extension # create_widget f2 c in + Tk.pack [wdg] (inner_pack_opts @ [ Tk.Side Tk.Side_Left ] @ opts); + ) + nodes; + let extra_opts = self # pack_opts in + Tk.pack (f_extra @ [f2]) (outer_pack_opts @ extra_opts); + f1 + + method pack_opts = + match self # xstretchable, self # ystretchable with + true, false -> [ Tk.Fill Tk.Fill_X; (* Tk.Expand true *) ] + | false, true -> [ Tk.Fill Tk.Fill_Y; (* Tk.Expand true *) ] + | true, true -> [ Tk.Fill Tk.Fill_Both; (* Tk.Expand true *) ] + | false, false -> [] + + method xstretchable = + let nodes = self # node # sub_nodes in + List.exists (fun n -> n # extension # xstretchable) nodes + + method ystretchable = + let nodes = self # node # sub_nodes in + List.exists (fun n -> n # extension # ystretchable) nodes + + end +;; + +class vspace = + object (self) + inherit shared + + val mutable att_height = Tk.Pixels 0 + val mutable att_fill = false + + method prepare idx = + self # init_color_and_font; + begin match self # node # attribute "height" with + Value v -> att_height <- get_dimension v + | _ -> assert false + end; + begin match self # node # attribute "fill" with + Value "yes" -> att_fill <- true + | Value "no" -> att_fill <- false + | _ -> assert false + end + + + method create_widget w c = + let f = Frame.create w ( self # bg_color_opt ) in + let strut = + Canvas.create f + ( [ Tk.Height att_height; Tk.Width (Tk.Pixels 0); + Tk.Relief Tk.Flat; + Tk.HighlightThickness (Tk.Pixels 0); + ] @ + self # bg_color_opt ) in + if att_fill then + Tk.pack [strut] [Tk.Fill Tk.Fill_Y; Tk.Expand true] + else + Tk.pack [strut] []; + f + + method pack_opts = + if att_fill then [ Tk.Fill Tk.Fill_Y; Tk.Expand true ] else [] + + method ystretchable = att_fill + end +;; + +class hspace = + object (self) + inherit shared + + + val mutable att_width = Tk.Pixels 0 + val mutable att_fill = false + + method prepare idx = + self # init_color_and_font; + begin match self # node # attribute "width" with + Value v -> att_width <- get_dimension v + | _ -> assert false + end; + begin match self # node # attribute "fill" with + Value "yes" -> att_fill <- true + | Value "no" -> att_fill <- false + | _ -> assert false + end + + + method create_widget w c = + let f = Frame.create w ( self # bg_color_opt ) in + let strut = + Canvas.create f + ( [ Tk.Width att_width; Tk.Height (Tk.Pixels 0); + Tk.Relief Tk.Flat; + Tk.HighlightThickness (Tk.Pixels 0); + ] @ + self # bg_color_opt ) in + if att_fill then + Tk.pack [strut] [Tk.Fill Tk.Fill_X; Tk.Expand true] + else + Tk.pack [strut] []; + f + + method pack_opts = + if att_fill then [ Tk.Fill Tk.Fill_X; Tk.Expand true ] else [] + + method xstretchable = att_fill + end +;; + +class label = + object (self) + inherit shared + + val mutable att_textwidth = (-1) + val mutable att_halign = "left" + + method prepare idx = + self # init_color_and_font; + att_textwidth <- (match self # node # attribute "textwidth" with + Value v -> + let w = try int_of_string v + with _ -> failwith ("Not an integer: " ^ v) in + w + | Implied_value -> + (-1) + | _ -> assert false); + att_halign <- (match self # node # attribute "halign" with + Value v -> v + | _ -> assert false); + + + method create_widget w c = + let opts_textwidth = if att_textwidth < 0 then [] else + [ Tk.TextWidth att_textwidth ] in + let opts_halign = + match att_halign with + "left" -> [ Tk.Anchor Tk.W ] + | "right" -> [ Tk.Anchor Tk.E ] + | "center" -> [ Tk.Anchor Tk.Center ] + | _ -> assert false + in + let opts_content = + [ Tk.Text (self # node # data) ] in + let label = Label.create w (opts_textwidth @ opts_halign @ + opts_content @ self # bg_color_opt @ + self # fg_color_opt @ self # font_opt) in + label + + end +;; + +class entry = + object (self) + inherit shared + + val mutable tv = lazy (Textvariable.create()) + val mutable att_textwidth = (-1) + val mutable att_slot = "" + + method prepare idx = + self # init_color_and_font; + tv <- lazy (Textvariable.create()); + att_textwidth <- (match self # node # attribute "textwidth" with + Value v -> + let w = try int_of_string v + with _ -> failwith ("Not an integer: " ^ v) in + w + | Implied_value -> + (-1) + | _ -> assert false); + att_slot <- (match self # node # attribute "slot" with + Value v -> v + | _ -> assert false); + + method create_widget w c = + let opts_textwidth = if att_textwidth < 0 then [] else + [ Tk.TextWidth att_textwidth ] in + let e = Entry.create w ( [ Tk.TextVariable (Lazy.force tv) ] @ + self # fg_color_opt @ + self # bg_color_opt @ + self # font_opt @ + opts_textwidth + ) in + let s = + try c # get_slot att_slot with + Not_found -> self # node # data in + Textvariable.set (Lazy.force tv) s; + e + + method accept c = + c # set_slot att_slot (Textvariable.get (Lazy.force tv)) + + end +;; + +class textbox = + object (self) + inherit shared + + val mutable att_textwidth = (-1) + val mutable att_textheight = (-1) + val mutable att_slot = "" + val mutable last_widget = None + + method prepare idx = + self # init_color_and_font; + att_textwidth <- (match self # node # attribute "textwidth" with + Value v -> + let w = try int_of_string v + with _ -> failwith ("Not an integer: " ^ v) in + w + | Implied_value -> + (-1) + | _ -> assert false); + att_textheight <- (match self # node # attribute "textheight" with + Value v -> + let w = try int_of_string v + with _ -> failwith ("Not an integer: " ^ v) in + w + | Implied_value -> + (-1) + | _ -> assert false); + att_slot <- (match self # node # attribute "slot" with + Value v -> v + | Implied_value -> "" + | _ -> assert false); + + + method create_widget w c = + let opts_textwidth = if att_textwidth < 0 then [] else + [ Tk.TextWidth att_textwidth ] in + let opts_textheight = if att_textheight < 0 then [] else + [ Tk.TextHeight att_textheight ] in + let f = Frame.create w (self # bg_color_opt) in + let vscrbar = Scrollbar.create f [ Tk.Orient Tk.Vertical ] in + let e = Text.create f ( [ ] @ + self # fg_color_opt @ + self # bg_color_opt @ + self # font_opt @ + opts_textwidth @ opts_textheight + ) in + last_widget <- Some e; + Scrollbar.configure vscrbar [ Tk.ScrollCommand + (fun s -> Text.yview e s); + Tk.Width (Tk.Pixels 9) ]; + Text.configure e [ Tk.YScrollCommand + (fun a b -> Scrollbar.set vscrbar a b) ]; + let s = + if att_slot <> "" then + try c # get_slot att_slot with + Not_found -> self # node # data + else + self # node # data + in + (* Text.insert appends always a newline to the last line; so strip + * an existing newline first + *) + let s' = + if s <> "" & s.[String.length s - 1] = '\n' then + String.sub s 0 (String.length s - 1) + else + s in + Text.insert e (Tk.TextIndex(Tk.End,[])) s' []; + if att_slot = "" then + Text.configure e [ Tk.State Tk.Disabled ]; + Tk.pack [e] [ Tk.Side Tk.Side_Left ]; + Tk.pack [vscrbar] [ Tk.Side Tk.Side_Left; Tk.Fill Tk.Fill_Y ]; + f + + method accept c = + if att_slot <> "" then + match last_widget with + None -> () + | Some w -> + let s = + Text.get + w + (Tk.TextIndex(Tk.LineChar(1,0),[])) + (Tk.TextIndex(Tk.End,[])) in + c # set_slot att_slot s + + end +;; + +class button = + object (self) + inherit shared + + val mutable att_label = "" + val mutable att_action = "" + val mutable att_goto = "" + + method prepare idx = + self # init_color_and_font; + att_label <- (match self # node # attribute "label" with + Value v -> v + | _ -> assert false); + att_action <- (match self # node # attribute "action" with + Value v -> v + | _ -> assert false); + att_goto <- (match self # node # attribute "goto" with + Value v -> v + | Implied_value -> "" + | _ -> assert false); + if att_action = "goto" then begin + try let _ = idx # find att_goto in () with + Not_found -> failwith ("Target `" ^ att_goto ^ "' not found") + end; + if att_action = "list-prev" or att_action = "list-next" then begin + let m = self # get_mask in + if m # node # parent # node_type <> T_element "sequence" then + failwith ("action " ^ att_action ^ " must not be used out of "); + end; + + + method create_widget w c = + let cmd () = + self # accept_mask c; + match att_action with + "goto" -> + c # goto att_goto + | "save" -> + c # save_obj + | "exit" -> + Protocol.closeTk() + | "save-exit" -> + c # save_obj; + Protocol.closeTk() + | "list-prev" -> + let m = self # get_mask # node in + let s = m # parent in + let rec search l = + match l with + x :: y :: l' -> + if y == m then + match x # attribute "name" with + Value s -> c # goto s + | _ -> assert false + else + search (y :: l') + | _ -> () + in + search (s # sub_nodes) + | "list-next" -> + let m = self # get_mask # node in + let s = m # parent in + let rec search l = + match l with + x :: y :: l' -> + if x == m then + match y # attribute "name" with + Value s -> c # goto s + | _ -> assert false + else + search (y :: l') + | _ -> () + in + search (s # sub_nodes) + | "hist-prev" -> + (try c # previous with Not_found -> ()) + | "hist-next" -> + (try c # next with Not_found -> ()) + | _ -> () + in + let b = Button.create w ( [ Tk.Text att_label; Tk.Command cmd ] @ + self # fg_color_opt @ + self # bg_color_opt @ + self # font_opt ) in + b + + + end +;; + + +(**********************************************************************) + +open Pxp_yacc + +let tag_map = + make_spec_from_mapping + ~data_exemplar:(new data_impl (new default)) + ~default_element_exemplar:(new element_impl (new default)) + ~element_mapping: + (let m = Hashtbl.create 50 in + Hashtbl.add m "application" + (new element_impl (new application)); + Hashtbl.add m "sequence" + (new element_impl (new sequence)); + Hashtbl.add m "mask" + (new element_impl (new mask)); + Hashtbl.add m "vbox" + (new element_impl (new vbox)); + Hashtbl.add m "hbox" + (new element_impl (new hbox)); + Hashtbl.add m "vspace" + (new element_impl (new vspace)); + Hashtbl.add m "hspace" + (new element_impl (new hspace)); + Hashtbl.add m "label" + (new element_impl (new label)); + Hashtbl.add m "entry" + (new element_impl (new entry)); + Hashtbl.add m "textbox" + (new element_impl (new textbox)); + Hashtbl.add m "button" + (new element_impl (new button)); + m) + () +;; + +(* ====================================================================== + * History: + * + * $Log$ + * Revision 1.1 2000/11/17 09:57:31 lpadovan + * Initial revision + * + * Revision 1.5 2000/08/30 15:58:49 gerd + * Updated. + * + * Revision 1.4 2000/07/16 19:36:03 gerd + * Updated. + * + * Revision 1.3 2000/07/08 22:03:11 gerd + * Updates because of PXP interface changes. + * + * Revision 1.2 2000/06/04 20:29:19 gerd + * Updates because of renamed PXP modules. + * + * Revision 1.1 1999/08/21 19:11:05 gerd + * Initial revision. + * + * + *) diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile new file mode 100644 index 000000000..c0068a59d --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/Makefile @@ -0,0 +1,16 @@ +.PHONY: all +all: + +.PHONY: clean +clean: + +.PHONY: CLEAN +CLEAN: clean + +.PHONY: distclean +distclean: clean + rm -f *~ + +.PHONY: symlinks +symlinks: + for x in *-style.xml; do ln -s ../xmlforms $${x%-style.xml} || true; done diff --git a/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml new file mode 100644 index 000000000..d3af5daa0 --- /dev/null +++ b/helm/DEVEL/pxp/pxp/examples/xmlforms/styles/address-style.xml @@ -0,0 +1,361 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + '> + + + + + + + + + + + + + '> + + + + + + + + + + + + + '> + + + + + + + + + + + +